//Two sample Kolmogorov Smirnov test
//Written by Raymond N. Greenwell, March 2000
#include <iostream.h>
#include <math.h>
#include <stdlib.h>
#include <fstream.h>
#include <iomanip.h>
int test=190000;//number of tests
int n1,n2,//number of data per test
k,
jrand;//random number generator counter
double x[1001],y[1001],//data values
p,//p values
alpha=.10,
correct,
randomseed,//seed for random number generator
oldrand[3218];//array of random numbers
double KS();
double AKSCDF(int, int, double);
double random();
void advancerand();
void warmuprand(double);
double zrand();
double g3(double);
void sort(double a[], int);
double max(double, double);
double pvalue(double);
double trand(int);
double chirand(int);
int lcd(int,int);

int main()
{
int ii,//test number
jj,//data number
kk,n2count,betacount,
nu,//degrees of freedom for t distribution
nuval,
reject,//number of times p<alpha
count;//number of times p=pmin
double T,//test statistic
rejected[5],//total number of times H0 rejected
val,avgrejected[3][5],
crit,//critical KS value
pmin,//minimum p>=alpha
pmax,//maximum p<alpha
pmin1,pmax1;
crit=sqrt(-.5*log(alpha/2));
//n1=400;
randomseed=.6;
warmuprand(randomseed);
for (ii=0; ii<=2; ii++)
{for (jj=0; jj<=4; jj++) avgrejected[ii][jj]=0;}
for (nuval=1; nuval<=1; nuval++)
{switch(nuval)
{case 1: nu=1; break;
case 2: nu=2; break;
case 3: nu=4; break;
case 4: nu=6; break;
case 5: nu=10; break;
default: cout << "error at switch statement" << endl; nu=10;}

//cout << "2 sample test, data from t distribution with " << nu
// << " degrees of freedom." << endl;
cout << "2 sample test, data from standard normal distribution" << endl;
//cout << "n1 " << " n2 " << " uncorrected " << " corrected " << " Kim " << endl;
cout << "n1 " << " n2 " << " exact " << endl;
for (n1=40; n1<=160; n1=n1+40)
{
for (n2count=1; n2count<=5; n2count=n2count+1)
{
switch(n2count)
{case 1: n2=n1/4; break;
case 2: n2=2*n1/5; break;
case 3: n2=n1/2; break;
case 4: n2=3*n1/4; break;
case 5: n2=n1; break;
default: cout << "error at switch statement" << endl; n2=n1;
}//end switch
for (betacount=1; betacount<=4; betacount++)
{
switch(betacount)
{case 1: correct=0; break;
case 2: if (n1%n2==0) correct=n1*0.032/(pow(n2,1.5));
else correct=0.25/(sqrt(n1));
break;
case 3: if (n1>2*n2)
correct=1/(2*sqrt(n1));
else {if (n1%n2==0) correct=2/(3*sqrt(n1));
else correct=2/(5*sqrt(n1));
} break;
case 4: correct=0; break;
default: cout << "error at switch statement" << endl; correct=0;
}//end switch(betacount);
count=0;
reject=0;
val=lcd(n1,n2)*sqrt((n1+n2)/(1.0*n1*n2));
pmax=pvalue(ceil(val*(crit-correct))/val+correct);
pmin=pvalue(floor(val*(crit-correct))/val+correct);
//pmin1=1;
//pmax1=0;
if (betacount==4) {pmin=2; pmax=2;}
k=1;
if (betacount==4) while ((k<=n1*n2) && (pmax>=alpha))
{
pmax=AKSCDF(n2,n1,float(k)/float(n1*n2));
if (pmax>=alpha)
{pmin=pmax;
k++;}
}//end while k
for (ii=1; ii<=test; ii++)
{for (jj=1; jj<=n1; jj++)
{x[jj]=zrand();
}
for (jj=1; jj<=n2; jj++)
{y[jj]=zrand();
}
sort(x,n1);
sort(y,n2);
T=KS();//Call routine to find KS statistic
if (betacount<4) p=pvalue(T*sqrt((1.0*n1*n2)/(n1+n2))+correct);
if (betacount==4) p=AKSCDF(n2,n1,T);
if (p<=alpha) reject++;
if (fabs(p-pmin)<.0001) count++;
}//end for ii loop
rejected[betacount]=(reject+(alpha-pmax)/(pmin-pmax)*count)/test;
//avgrejected[n2count%2][betacount] = avgrejected[n2count%2][betacount] + rejected[betacount];
}//end for betacount
cout << n1 << " & "<< n2 << " & "<< rejected[1] << " & "<< rejected[2] <<" & "<< rejected[3]
<<" & "<< rejected[4]<<" \\\\ "<< endl;
}//end for n2
}//end for n1
//cout << "nonmultiple " << avgrejected[0][1]/8 << " & "<< avgrejected[0][2]/8 <<" & "
// << avgrejected[0][3]/8 << endl;
//cout << "multiple " << avgrejected[1][1]/12 << " & "<< avgrejected[1][2]/12 <<" & "
// << avgrejected[1][3]/12 <<endl;
//cout << "total " << (avgrejected[0][1]+avgrejected[1][1])/20 << " & "<<
// (avgrejected[0][2]+avgrejected[1][2])/20 <<" & "<<
// (avgrejected[0][3]+avgrejected[1][3])/20 << endl;
}//end for nuval
}

double AKSCDF(int m, int n, double d)
//m is smaller sample size
{double u[801],w;
int i,j,k,mult;
mult=lcd(m,n);
k=m*n*(d-1.0/mult)+0.5;
u[1]=1;
for (j=1; j<=n; j++)
{
u[j+1]=1.0;
if(m*j>k) u[j+1]=0.0;
}//end for j
for (i=1; i<=m; i++)
{
w=float(i)/float(i+n);
u[1]=w*u[1];
if(n*i>k) u[1]=0.0;
for (j=1; j<=n; j++)
{
u[j+1]=u[j]+u[j+1]*w;
if(abs(n*i-m*j)>k) u[j+1]=0.0;
}//end for j
}//end for i
return 1-u[n+1];
} // END AKSCDF

double KS()
//Find maximum value of difference between the two
//sample distribution functions.
{
double fmax=0;//largest value found so far
int count1=1,//data number
count2=1;
while ((count1<=n1) && (count2<=n2))
{
if (x[count1]<y[count2])
{fmax=max(fmax, fabs((count1-1.0)/n1-(count2-1.0)/n2));
fmax=max(fmax, fabs(count1/(1.0*n1)-(count2-1.0)/n2));
count1++;
}
else{if (x[count1]>y[count2])
{fmax=max(fmax, fabs((count1-1.0)/n1-(count2-1.0)/n2));
fmax=max(fmax, fabs((count1-1.0)/n1-count2/(1.0*n2)));
count2++;
}
else {fmax=max(fmax, fabs((count1-1.0)/n1-(count2-1.0)/n2));
fmax=max(fmax, fabs(count1/(1.0*n1)-count2/(1.0*n2)));
fmax=max(fmax, fabs((count1-1.0)/n1-(count2)/(1.0*n2)));
fmax=max(fmax, fabs(count1/(1.0*n1)-(count2-1.0)/(1.0*n2)));
count1++;count2++;
}//end second else
}//end first else
}//end while
return fmax;
}

double zrand()
//Generates standard normal random numbers, based on scheme of
//George Marsaglia and T.A. Bray,
//SIAM Review, 1964, vol. 6, p. 260-264.
{double u,u1,u2,u3,x,y,z,v1,v2,v3,v4;
int done, ok;
u=random();
if (u<.8638) {u1=random(); u2=random(); u3=random(); z=2*(u1+u2+u3-1.5);}
else {if (u<.9745) {u1=random(); u2=random(); z=1.5*(u1+u2-1);}
else {if (u<.9973002039)
{done=0;
while (done==0)
{x=6*random()-3;
y=.358*random();
if (y<g3(x))
{z=x; done=1;}
}
}
else {done=0;
while (done==0)
{ok=0;
while (ok==0)
{v1=2*random()-1;
v2=2*random()-1;
v3=v1*v1+v2*v2;
if (v3<1) ok=1;}
v4=sqrt((9-2*log(v3))/v3);
x=v1*v4;
y=v2*v4;
if (x>3) {done=1; z=x;}
else {if (y>3) {done=1; z=y;}}
}//end while done=0
}//end third else
}//end second else
}//end first else
return z;}

double g3(double x)
//used by zrand
{
double y,ax;
ax=fabs(x);
if(ax<1)
y=17.49731196*exp(-.5*x*x)-4.73570326*(3-x*x)-2.15787544*(1.5-ax);
else {if (ax<1.5)
y=17.49731196*exp(-.5*x*x)-2.36785163*(3-ax)*(3-ax)-2.15787544*(1.5-ax);
else {if (ax<3)
y=17.49731196*exp(-.5*x*x)-2.36785163*(3-ax)*(3-ax);
else y=0;}
}
return y;}

void sort (double a[], int num)
// SHELL SORT ALGORITHM CACM JULY 1964
{
int i=1,
m,j,k,l,
endloop;
double xx;
while (i<=num) i=2*i;
m=i-1;
m=m/2;
while (m>0)
{k=num-m;
for (j=1; j<=k; j++)
{l=j;
endloop=0;
while (endloop==0)
{if (l<1) endloop=1;
else {if (a[l+m]>=a[l]) endloop=1;
else {xx=a[l+m];
a[l+m]=a[l];
a[l]=xx;
l=l-m;
}//end else
}//end if
}//end while
}//end for
m=m/2;
}//end while m
}//end sort

double max(double aa, double bb)
{double mm;
mm=aa;
if (bb>aa) mm=bb;
return mm;}

void advancerand()
//Create next batch of 100 random numbers.
//Random number generator from Genetic Algorithms by David Goldberg
//p. 334-335
//Using generator with longer length from Knuth Vol. 2
{int j;
double newrand;
for (j=1; j<=37; j++)
{newrand=oldrand[j]-oldrand[j+63];
if (newrand<0) newrand=newrand+1;
oldrand[j]=newrand;}
for (j=38; j<=100; j++)
{newrand=oldrand[j]-oldrand[j-37];
if (newrand<0) newrand=newrand+1;
oldrand[j]=newrand;}
}

void warmuprand(double randomseed)
//Get random number generator started.
{
int i,j;
double newrand,prevrand;
oldrand[100]=randomseed;
newrand=1.0e-9;
prevrand=randomseed;
for (j=1; j<=99; j++)
{i=(43*j)%100;
oldrand[i]=newrand;
newrand=prevrand-newrand;
if (newrand<0) newrand=newrand+1;
prevrand=oldrand[i];}
advancerand();advancerand();advancerand();
jrand=0;
}

double random()
//Uniform random numbers on [0,1]
{int i;
jrand++;
if (jrand>100)
{jrand=1;
advancerand();}
return oldrand[jrand];
}

double pvalue(double x)
//Calculate pvalue based on KS coefficient
//See Fisz, p. 396
{
return 2*exp(-2*x*x)-2*exp(-8*x*x);
}

double trand(int nu)
//Generates random variables with the t distribution
//with nu degrees of freedom
//See Knuth, Vol. 2, 2nd ed, p. 129-130
//nu=degrees of freedom
{double y1,y2;
y1=zrand();
if (nu==0) cout << "Error; nu=0" << endl;
do
{y2=chirand(nu);}
while (y2==0);
return y1/sqrt(y2/nu);
}//end trand

double chirand(int nu)
//Generates random variables with the chi squared distribution
//with nu degrees of freedom
//See Knuth, Vol. 2, 2nd ed, p. 129-130
{int k;
double x,z,a,prod,u;
double y1,y2;
a=nu/2.0;//a=order of gamma function
if (fabs(a-.5)<.001)
{z=zrand();
x=z*z;}
else{if (fabs(floor(a)-a)<.001)
{prod=1;
for (k=1; k<= floor(a); k++)
{do
{u=random();}
while (u==0);
prod=prod*u;
}//end for k
x=-log(prod);
}
else cout << nu << " is bad value for nu." << endl;
}//end else
return x;}//end chirand

int lcd(int n1, int n2)
{int r,//gcd
m,n;
m=n1; n=n2;
do
{r=m%n;
m=n;
n=r;
}
while (r>0);
return n1*n2/m;
}//end lcd