#define WANT_STREAM #define WANT_MATH #include "include.h" #include "newran.h" #ifdef use_namespace using namespace NEWRAN; #endif void SortAscending(Real* data, int max); Real KS(Real* data, int n); Real NormalDF(Real x); double invchi95(int N); double invchi99(int N); void ChiSquaredTest(int* Observed, Real* Prob, int N, int n); void TestBinomial(int N, Real p, int n); void TestPoisson(Real mu, int n); void TestNegativeBinomial(Real NX, Real p, int n); void TestDiscreteGen(int N, Real* prob, int n); inline Real square(Real x) { return x*x; } inline Real cube(Real x) { return x*x*x; } void test3(int n) { cout << endl; // Do chi-squared tests to discrete data cout << "ChiSquared tests" << endl; { Real p[] = { 0.05, 0.10, 0.05, 0.5, 0.01, 0.01, 0.03, 0.20, 0.05 }; TestDiscreteGen(9, p, n); } { Real p[] = { 0.4, 0.2, 0.1, 0.05, 0.025, 0.0125, 0.00625, 0.00625, 0.2 }; TestDiscreteGen(9, p, n); } TestNegativeBinomial(200.3, 0.05, n); TestNegativeBinomial(150.3, 0.15, n); TestNegativeBinomial(100.8, 0.18, n); TestNegativeBinomial(100.8, 1.22, n); TestNegativeBinomial(100.8, 9.0, n); TestNegativeBinomial(10.5, 0.18, n); TestNegativeBinomial(10.5, 1.22, n); TestNegativeBinomial(10.5, 9.0, n); TestNegativeBinomial(0.35, 0.18, n); TestNegativeBinomial(0.35, 1.22, n); TestNegativeBinomial(0.35, 9.0, n); TestBinomial(100, 0.45, n); TestBinomial(100, 0.25, n); TestBinomial(100, 0.02, n); TestBinomial(100, 0.01, n); TestBinomial(49, 0.60, n); TestBinomial(21, 0.70, n); TestBinomial(10, 0.90, n); TestBinomial(10, 0.25, n); TestBinomial(10, 0.10, n); TestPoisson(0.75, n); TestPoisson(4.3, n); TestPoisson(10, n); TestPoisson(100, n); Real* data = new Real[n]; if (!data) Throw(Bad_alloc()); // Apply KS test to a variety of continuous distributions // - use cdf transform to convert to uniform cout << endl; cout << "Kolmogorov-Smirnoff tests" << endl; cout << "25%, 5%, 1%, .1% upper points are 1.019, 1.358, 1.628, 1.950" << endl; cout << "5% lower point is 0.520" << endl; { ChiSq X(1, 1.44); for (int i = 0; i < n; i++) { Real x = sqrt(X.Next()); data[i] = NormalDF(x - 1.2) - NormalDF(-x - 1.2); } cout << X.Name() << ": " << KS(data, n) << endl; } { ChiSq X(4); for (int i = 0; i < n; i++) { Real x = 0.5 * X.Next(); data[i] = (1+x)*exp(-x); } cout << X.Name() << ": " << KS(data, n) << endl; } { ChiSq X(2); for (int i = 0; i < n; i++) data[i] = exp(-0.5 * X.Next()); cout << X.Name() << ": " << KS(data, n) << endl; } { Pareto X(0.5); for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = 1.0 / sqrt(x); } cout << X.Name() << ": " << KS(data, n) << endl; } { Pareto X(1.5); for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = 1.0 / (x * sqrt(x)); } cout << X.Name() << ": " << KS(data, n) << endl; } { Normal X; for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = NormalDF(x); } cout << X.Name() << ": " << KS(data, n) << endl; } { Normal N; SumRandom X = 10 + 5 * N; for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = NormalDF((x-10)/5); } cout << X.Name() << ": " << KS(data, n) << endl; } { Normal N; Cauchy C; MixedRandom X = N(0.9) + C(0.1); for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = 0.9*NormalDF(x)+0.1*(atan(x)/3.141592654 + 0.5); } cout << X.Name() << ": " << KS(data, n) << endl; } { Normal N; MixedRandom X = N(0.9) + (10*N)(0.1); for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = 0.9*NormalDF(x)+0.1*NormalDF(x/10); } cout << X.Name() << ": " << KS(data, n) << endl; } { Normal X0; SumRandom X = X0 * 0.6 + X0 * 0.8; for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = NormalDF(x); } cout << X.Name() << ": " << KS(data, n) << endl; } { Normal X1; MixedRandom X = X1(0.2) + (X1 * 2.5 + 1.1)(0.35) + (X1 + 2.3)(0.45); for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = 0.20 * NormalDF(x) + 0.35 * NormalDF((x - 1.1) / 2.5) + 0.45 * NormalDF(x - 2.3); } cout << X.Name() << ": " << KS(data, n) << endl; } { Gamma X(0.5); for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = 2.0 * NormalDF(-sqrt(2 * x)); } cout << X.Name() << ": " << KS(data, n) << endl; } { Gamma X(3); for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = (1+x+0.5*x*x)*exp(-x); } cout << X.Name() << ": " << KS(data, n) << endl; } { Gamma X1(0.85); Gamma X2(2.15); SumRandom X = X1 + X2; for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = (1+x+0.5*x*x)*exp(-x); } cout << X.Name() << ": " << KS(data, n) << endl; } { Gamma X1(0.75); Gamma X2(0.25); SumRandom X = X1 + X2; for (int i = 0; i < n; i++) data[i] = exp(-X.Next()); cout << X.Name() << ": " << KS(data, n) << endl; } { Gamma X(2); for (int i = 0; i < n; i++) { Real x = X.Next(); data[i] = (1+x)*exp(-x); } cout << X.Name() << ": " << KS(data, n) << endl; } { Exponential X; for (int i = 0; i < n; i++) data[i] = exp(-X.Next()); cout << X.Name() << ": " << KS(data, n) << endl; } { Cauchy X; for (int i = 0; i < n; i++) data[i] = atan(X.Next())/3.141592654 + 0.5; cout << X.Name() << ": " << KS(data, n) << endl; } { Cauchy X0; SumRandom X = X0 * 0.3 + X0 * 0.7; for (int i = 0; i < n; i++) data[i] = atan(X.Next())/3.141592654 + 0.5; cout << X.Name() << ": " << KS(data, n) << endl; } { Uniform X; for (int i = 0; i < n; i++) data[i] = X.Next(); cout << X.Name() << ": " << KS(data, n) << endl; } delete [] data; } /*************************** Kolmogorov Smirnov Test ************************/ // test the data in the array (length n) for being uniform (0,1) Real KS(Real* data, int n) { SortAscending(data, n); Real D = 0.0; for (int i = 0; i < n; i++) { Real d1 = (Real)(i+1) / (Real)n - data[i]; Real d2 = data[i] - (Real)i / (Real)n; if (D < d1) D = d1; if (D < d2) D = d2; } return D * (sqrt(n) + 0.12 + 0.11 / sqrt(n)); } /******************************** Quick sort ********************************/ // Quicksort. // Essentially the method described in Sedgewick's algorithms in C++ // My version is still partially recursive, unlike Segewick's, but the // smallest segment of each split is used in the recursion, so it should // not overlead the stack. // If the process does not seems to be converging an exception is thrown. #define DoSimpleSort 17 // when to switch to insert sort #define MaxDepth 50 // maximum recursion depth static Real SortThreeDescending(Real* a, Real* b, Real* c); static void MyQuickSortAscending(Real* first, Real* last, int depth); static void InsertionSortAscending(Real* first, const int length, int guard); static Real SortThreeDescending(Real* a, Real* b, Real* c) { // sort *a, *b, *c; return *b; optimise for already sorted if (*a >= *b) { if (*b >= *c) return *b; else if (*a >= *c) { Real x = *c; *c = *b; *b = x; return x; } else { Real x = *a; *a = *c; *c = *b; *b = x; return x; } } else if (*c >= *b) { Real x = *c; *c = *a; *a = x; return *b; } else if (*a >= *c) { Real x = *a; *a = *b; *b = x; return x; } else { Real x = *c; *c = *a; *a = *b; *b = x; return x; } } void SortAscending(Real* data, int max) { if (max > DoSimpleSort) MyQuickSortAscending(data, data + max - 1, 0); InsertionSortAscending(data, max, DoSimpleSort); } static void InsertionSortAscending(Real* first, const int length, int guard) // guard gives the length of the sequence to scan to find first // element (eg guard = length) { if (length <= 1) return; // scan for first element Real* f = first; Real v = *f; Real* h = f; if (guard > length) guard = length; int i = guard - 1; while (i--) if (v > *(++f)) { v = *f; h = f; } *h = *first; *first = v; // do the sort i = length - 1; f = first; while (i--) { Real* g = f++; h = f; v = *h; while (*g > v) *h-- = *g--; *h = v; } } static void MyQuickSortAscending(Real* first, Real* last, int depth) { for (;;) { const int length = last - first + 1; if (length < DoSimpleSort) return; if (depth++ > MaxDepth) Throw(Exception("QuickSortAscending fails")); Real* centre = first + length/2; const Real test = SortThreeDescending(last, centre, first); Real* f = first; Real* l = last; for (;;) { while (*(++f) < test) {} while (*(--l) > test) {} if (l <= f) break; const Real temp = *f; *f = *l; *l = temp; } if (f > centre) { MyQuickSortAscending(l+1, last, depth); last = f-1; } else { MyQuickSortAscending(first, f-1, depth); first = l+1; } } } Real NormalDF(Real x) { // from Abramowitz and Stegun - accuracy 7.5E-8 // accuracy is absolute; not relative // eventually will need a better method // but good enough here Real t = 1.0 / (1.0 + 0.2316419 * fabs(x)); t = ( 0.319381530 + (-0.356563782 + ( 1.781477937 + (-1.821255978 + 1.330274429 * t) * t) * t) * t) * t; t = 0.3989422804014326779399461 * exp(-0.5 * x * x) * t; return (x < 0) ? t : 1.0 - t; } void ChiSquaredTest(int* Observed, Real* Prob, int N, int n) { // go for at least two expected observations per cell // work in from ends if (N <= 0) { cout << "no categories" << endl; return; } if (n <= 0) { cout << "no data" << endl; return; } int O1 = 0; Real E1 = 0.0; int O2 = 0; Real E2 = 0.0; Real CS = 0.0; int df = 0; int i = 0; int Ni = N; Real ToGo = n; for (;;) { O1 += Observed[i]; Real e1 = n * Prob[i]; E1 += e1; ToGo -= e1; if (E1 >= 2.0 && ToGo + E2 >= 2.0) { CS += square(O1 - E1) / E1; df += 1; O1 = 0; E1 = 0.0; } if (i == Ni) break; ++i; O2 += Observed[Ni]; Real e2 = n * Prob[Ni]; E2 += e2; ToGo -= e2; if (E2 >= 2.0 && ToGo + E1 >= 2.0) { CS += square(O2 - E2) / E2; df += 1; O2 = 0; E2 = 0.0; } if (i == Ni) break; --Ni; } E1 += E2; O1 += O2; if (E1 > 0.0) { CS += square(O1 - E1) / E1; df += 1; } if (fabs(ToGo) >= 0.01) cout << "chi-squared program fails - "; cout << "chisq = " << CS << "; df = " << (df-1) << "; 95% pt. = " << invchi95(df-1) << "; 99% pt. = " << invchi99(df-1) << endl; } void TestBinomial(int N, Real p, int n) { Binomial X(N, p); Real q = 1.0 - p; Real ln_p = log(p); Real ln_q = log(q); int* obs = new int [N+1]; if (!obs) Throw(Bad_alloc()); Real* prob = new Real [N+1]; if (!prob) Throw(Bad_alloc()); int i; for (i = 0; i <= N; i++) { obs[i] = 0; prob[i] = exp(ln_gamma(N+1) - ln_gamma(i+1) - ln_gamma(N-i+1) + i * ln_p + (N-i) * ln_q); } for (i = 0; i < n; i++) { int b = (int)X.Next(); if (b < 0 || b > N) Throw(Logic_error("Binomial error")); obs[b]++; } cout << "Binomial: "; ChiSquaredTest(obs, prob, N, n); delete [] obs; delete [] prob; } void TestPoisson(Real mu, int n) { Poisson X(mu); Real ln_mu = log(mu); int N = (int)(20 + mu + 10 * sqrt(mu)); // set upper bound if (N > n) { cout << "Poisson: range too large" << endl; return; } int* obs = new int [N+1]; if (!obs) Throw(Bad_alloc()); Real* prob = new Real [N+1]; if (!prob) Throw(Bad_alloc()); int i; for (i = 0; i <= N; i++) { obs[i] = 0; prob[i] = exp(i * ln_mu - mu - ln_gamma(i+1)); } for (i = 0; i < n; i++) { int b = (int)(X.Next()); if (b < 0 || b > N) Throw(Logic_error("Poisson error")); obs[b]++; } cout << "Poisson: "; ChiSquaredTest(obs, prob, N, n); delete [] obs; delete [] prob; } void TestNegativeBinomial(Real NX, Real P, int n) { NegativeBinomial X(NX, P); Real Q = 1.0 + P; Real p = 1.0 / Q; Real q = 1.0 - p; Real ln_p = log(p); Real ln_q = log(q); Real mean = NX * P; Real var = mean * Q; int N = (int)(20 + mean + 100 * sqrt(var)); // set upper bound // won't be good enough for large P if (N > n) { cout << "NegativeBinomial: range too large" << endl; return; } int* obs = new int [N+1]; if (!obs) Throw(Bad_alloc()); Real* prob = new Real [N+1]; if (!prob) Throw(Bad_alloc()); int i; for (i = 0; i <= N; i++) { obs[i] = 0; prob[i] = exp(ln_gamma(NX+i) - ln_gamma(i+1) - ln_gamma(NX) + NX * ln_p + i * ln_q); } for (i = 0; i < n; i++) { int b = (int)X.Next(); if (b < 0 || b > N) Throw(Logic_error("NegativeBinomial error")); obs[b]++; } cout << "NegativeBinomial: "; ChiSquaredTest(obs, prob, N, n); delete [] obs; delete [] prob; } void TestDiscreteGen(int N, Real* prob, int n) { DiscreteGen X(N, prob); int* obs = new int [N]; if (!obs) Throw(Bad_alloc()); int i; for (i = 0; i < N; i++) obs[i] = 0; for (i = 0; i < n; i++) { int b = (int)X.Next(); if (b < 0 || b >= N) Throw(Logic_error("DiscreteGen error")); obs[b]++; } cout << "DiscreteGen: "; ChiSquaredTest(obs, prob, N-1, n); delete [] obs; } // Calculate 95% point of chi-squared distribution double invchi95(int N) // upper 95% point of chi-squared distribution { if (N < 0) Throw(Logic_error("Error in invchi95 arg")); if (N < 30) { double Q[] = { 0, 3.841459, 5.991465, 7.814728, 9.487729, 11.0705, 12.59159, 14.06714, 15.50731, 16.91898, 18.30704, 19.67506, 21.02601, 22.36199, 23.68475, 24.99576, 26.2962, 27.58709, 28.86928, 30.14351, 31.4104, 32.6705, 33.9244, 35.1725, 36.4151, 37.6525, 38.8852, 40.1133, 41.3372, 42.5569 }; return Q[N]; } else { double A = 1.0/(4.5 * N); double H = (-0.0002 * 60)/N; double Q = N * cube(1 - A + (1.645 - H) * sqrt(A)); return Q; } } // Calculate 99% point of chi-squared distribution double invchi99(int N) // upper 99% point of chi-squared distribution { if (N < 0) Throw(Logic_error("Error in invchi99 arg")); if (N < 30) { double Q[] = { 0, 6.63490, 9.21034, 11.3449, 13.2767, 15.0863, 16.8119, 18.4753, 20.0902, 21.6660, 23.2093, 24.7250, 26.2170, 27.6883, 29.1413, 30.5779, 31.9999, 33.4087, 34.8053, 36.1908, 37.5662, 38.9321, 40.2894, 41.6384, 42.9798, 44.3141, 45.6417, 46.9630, 48.2782, 49.5879 }; return Q[N]; } else { double A = 1.0/(4.5 * N); double H = (0.0008 * 60)/N; double Q = N * cube(1 - A + (2.326 - H) * sqrt(A)); return Q; } }