TangoS.index<-function(dat, geo, lam, Nrep){
#
# -----------------------------------------------------------------
#  An example of execution
#
#    Nrep<-99
#    lam<-c(0.1, (1:20)*5)
#    geo  <-scan("d:/demo/gallblad.geo",list(id=0,y=0,x=0))
#    dat  <-scan("d:/demo/gallblad.dat",list(id=0,d=0,e=0))
#    out<-TangoS.index(dat, geo, lam, Nrep)
#
# ---------------------------------------------------------------------
#
#  ARGUEMENTS
#
#    Nrep    : The number of Monte Carlo replications, e.g., 999, 9999
#    geo$id  : a vector of region id 
#    geo$x   : a vector of x-coordinate of region centroid, e.g., longitude
#    geo$y   : a vector of y-coordinate of region centroid, e.g., latitude
#    dat$id  : (the same id as geo$id)
#    dat$d   : a vector of observed number of cases
#    dat$e   : a vector of expected number of cases
#    lam     : a vector of lambda values used for the profile p-values
#
#  VALUES
#    p.value : Monte Calro simulated (adjusted) p-vallue
#    lamstar : optimal value of lambda
#    id      : (the same id as geo$id)
#    stdcont : a vector of standardized Ui
#    smr     : a vector of standardized mortality (incidence) ratio
#
# ----------------------------------------------------------------------------
#   Explanations of major variables used in the program
#
#   NOTE !!
#   dc : distance matrix calculated from geo$x and geo$y
#       ( Depending on the user's situation, this should be modified !)
#
#   nr : the number of regions
#   nn : the number of cases
#   kkk: the number of lambda values
#   pxx: sorted p-values due to Monte Carlo replications under
#        the null
#   ss : per cent contribution to C 
#   topid  : region id sorted in the descending order of ss
#            (e.g., top[1] denotes the most likely location of cluster, if any.)
#   topstd : standardized per cent contribution sorted in the 
#            descending order of ss
#   pres   : The resultant adjusted p-value based on Monte Carlo hypothesis testing
#
# ----------------------------------------------------------------------------
nloop<-Nrep
par(mfrow=c(1,1))
start<-date()
nr<-length(geo$id)
qdat<-rep(0,nr)
p<-rep(0,nr)
w<-matrix(0,nr,nr)
nn<-sum(dat$d)

kvec<-lam
kkk<- length(kvec)
dc<-matrix(0,nr,nr)
regid<-geo$id
frq<-matrix(0,nloop,nr)
pval<-matrix(0,nloop,kkk)
pxx<-rep(0,nloop)
pcc<-pxx

for (i in 1:nr){
for (j in i:nr){
dij<-sqrt(((geo$x[i]-geo$x[j])*90.15)**2 + ((geo$y[i]-geo$y[j])*110.9)**2) # for Tokyo area
# dij<-sqrt(((geo$x[i]-geo$x[j]))**2 + ((geo$y[i]-geo$y[j]))**2)
dc[j,i]<- dij
dc[i,j]<- dij
}
}
#
# Calculation of (1) null distribution of test statistic Pmin
#
qdat<- dat$d
poo<-dat$e
p<-poo/sum(poo)
pp<-matrix(p)
w<-diag(p)-pp%*%t(pp)
py<-p
vv<-py/sum(py)
cvv<-cumsum(vv)
for (ijk in 1:nloop) {
  r1<-hist(runif(nn),breaks=c(0,cvv),plot=F)
  frq[ijk,]<- frq[ijk,] + r1$count
}

frq<-frq/nn
ori<- qdat
qdat<-qdat/nn
#
# Figure 1: Locations of nr regions and population size 
#
symbols(geo$x,geo$y,circles=sqrt(p),inch=0.50) 
text(geo$x,geo$y,regid,adj= -0.5, col=8)
#
for (k in 1:kkk){
rad<- kvec[k]
ac <- exp( -4 * (dc/rad)^2 )
hh <- ac%*%w
hh2<- hh%*%hh
av <- sum(diag( hh ))
av2<- sum(diag( hh2 ))
av3<- sum(diag( hh2%*%hh ))
skew1<- 2*sqrt(2)*av3/ (av2)^1.5
df1<- 8/skew1/skew1
eg1<-av
vg1<-2*av2
for (ijk in 1:nloop){
q<-frq[ijk,]
gt<-(q-p)%*%ac%*%(q-p)*nn
stat1<-(gt-eg1)/sqrt(vg1)
aprox<-df1+sqrt(2*df1)*stat1
pval[ijk,k]<- 1-pchisq(aprox, df1)
}
}
for (ijk in 1:nloop){
pcc[ijk]<-min( pval[ijk,] )
}
pxx<- sort( pcc )
#
# Calculation of (2) test statistic Pmin and its adjusted p-value
# of observed data
#
ptan<-rep(0,kkk)
gtt<-rep(0,kkk)
jjj<-1:kkk
ep<-p*nn
po<-ppois(ori,ep)
pa<-(po-0.90)/0.10
p1<-ifelse(po > 0.9 & ori>0, pa^3, 0)
#
# Figure 2: Observed number of cases and the significance of O/E ratio 
#
symbols(geo$x,geo$y,circle=sqrt(p1),inch=0.2) 
text(geo$x, geo$y, ori, col=8)
#par(cex=0.5)
#par(mar=c(4,3,3,5))
#par(mfrow=c(1,2))
#par(mgp=c(1.5,0.5,0))
#
for (k in 1:kkk) {
ti<-date()
rad<- kvec[k]
ac <- exp( -4 * (dc/rad)^2 )
hh <- ac%*%w
hh2<- hh%*%hh
av <- sum(diag( hh ))
av2<- sum(diag( hh2 ))
av3<- sum(diag( hh2%*%hh ))
skew1<- 2*sqrt(2)*av3/ (av2)^1.5
df1<- 8/skew1/skew1
eg1<-av
vg1<-2*av2
gt<-(qdat-p)%*%ac%*%(qdat-p)*nn
gtt[k]<-gt
stat1<-(gt-eg1)/sqrt(vg1)
aprox<-df1+sqrt(2*df1)*stat1
ptan[k]<- 1-pchisq(aprox, df1)
}
#
# Figure 3: the profile p-value for the values of lambda
#
p22<-min( ptan )
at<-c(pxx,p22)
pres<-length(at[at<=p22])/(length(pxx)+1)
for (i in 1:kkk){
 if ( abs(p22 - ptan[i] )< 0.0000001 ){pind<-i}
}
#pind<-jjj[ptan==min(ptan)]
plot(kvec,ptan,pch=1,type="b",xlab="cluster size ( lambda values ) ",
ylab=" Unadjusted p-values")
abline(v=kvec[pind])
text((max(kvec)+2*min(kvec))/3,(3*max(ptan)+min(ptan))/4,
"Adjusted p-value = ",col=8)
text((1.5*max(kvec)+min(kvec))/2.5,(3*max(ptan)+min(ptan))/4, pres,col=8)
#
# Figure 4: Percent contribution Ui
#
rad<- kvec[pind]
ac <- exp( -4 * (dc/rad)^2 )
ss<-ac%*%(qdat-p)*nn
ss<-(qdat-p)*ss
ss2<-ss
ss<-ss/sum(ss)*100
plot(regid,ss,pch=1,xlab=" region ID ", ylab=" Percent contribution ")
text(regid+2,ss+1,regid,col=8)
topid<-regid[sort.list(-ss)]
#
# Standardized per cent contribution
z<- (ss-mean(ss)) %*% (1/sqrt(var(ss)))
topstd<-z[sort.list(-ss)]
smr<-ori/ep
topsmr<-smr[sort.list(-ss)]
end<- date()
list(p.value=pres, lamstar= rad, id=topid, stdcont=topstd, smr=topsmr)
}