3.2 Main loop
Loop over the individual sites, doing the following:
- compose the site .csv file name
- read the input data
- discard samples with mssing ages
- discard samples with ages after 2020 CE
- initial minimax rescaling (for historical reasons) (minimax)
- set alphathe Box-Cox transformation shift parameter
- maximum likelihood estimation of lambda
- Box-Cox transformation of data
- minimax rescaling tall
- calculate mean and standard deviation of data over base period
- calculate z-scores ztrans
- write out transformed data for this site
# main loop
for (j in seq(1,nsites)) {
  # 1. site .csv file name (input file)
  sitenum <- sites[j,1]
  sitename <- as.character(sites[j,5])
  siteidchar <- as.character(sitenum)
  if (sitenum >= 1) siteid <- paste("000", siteidchar, sep="")
  if (sitenum >= 10) siteid <- paste("00", siteidchar, sep="")
  if (sitenum >= 100) siteid <- paste("0", siteidchar, sep="")
  if (sitenum >= 1000) siteid <- paste(    siteidchar, sep="")
  inputfile <- paste(sitecsvpath, siteid, "_data.csv", sep="")
  print(j); print(sitenum); print(sitename); print(inputfile)
  # 2. read the input data
  sitedata <- read.csv(inputfile)
  nsamp <- length(sitedata[,1])
  nsampchar <- as.character(nsamp)
  writeLines(paste("Site",siteidchar,nsampchar,"samples", sep=" "), con = debugfile, sep = "\n")
  
  # 3. discard samples with missing (-9999) ages
  sitedata <- sitedata[sitedata$est_age != -9999,]
  
  # 4. discard samples with ages > -70
  sitedata <- sitedata[sitedata$est_age > -70,]
  # 5. initial minimax rescaling of data
  minimax <- (sitedata$influx-min(sitedata$influx))/(max(sitedata$influx)-min(sitedata$influx))
  
  # 6. set `alpha` the Box-Cox transformation shift parameter
  alpha <- 0.01  # Box-Cox shift parameter
  # alternative alpha: 0.5 times the smallest nonzero value of influx
  # alpha <- 0.5*min(sitedata$influx[sitedata$influx != 0])  
  # 7. maximum likelihood estimation of lambda
  # derived from the boxcox.R function in the Venables and Ripley MASS library included in R 2.6.1
  npts <- 201 # number of estimates of lambda
  y <- minimax+alpha
  n <- length(y)
  logy <- log(y)
  ydot <- exp(mean(logy))
  lasave <- matrix(1:npts)
  liksave <- matrix(1:npts)
  for (i in 1:npts) {
    la <- -2.0+(i-1)*(4/(npts-1))
    if (la != 0.0) yt <- (y^la-1)/la else yt <- logy*(1+(la*logy)/2*(1+(la*logy)/3*(1+(la*logy)/4)))
    zt <- yt/ydot^(la-1)
    loglik <- -n/2*log(sum((zt - mean(zt))^2 ))
    lasave[i] <- la
    liksave[i] <- loglik
    }
  # save the maximum likelihood value and the associated lambda
  maxlh <- liksave[which.max(liksave)]
  lafit <- lasave[which.max(liksave)]
  print (c(sitenum, maxlh, lafit))
  # 8. Box-Cox transformation of data
  if (lafit == 0.0) tall <- log(y) else tall <- (y^lafit - 1)/lafit
  # 9. minimax rescaling
  tall <- (tall - min(tall))/(max(tall)-min(tall))
  
  # 10. calculate mean and standard deviation of data over base period
  tmean <- mean(tall[sitedata$est_age >= basebeg & sitedata$est_age <= baseend])
  tstdev <- sd(tall[sitedata$est_age >= basebeg & sitedata$est_age <= baseend])
  
  # 11. calculate z-scores
  ztrans <- (tall-tmean)/tstdev
  
  # 12. write out transformed data for this site
  siteout <- data.frame(cbind(sitedata[,1], sitedata$id_sample, sitedata$est_age,
    sitedata$depth, sitedata$influx, minimax, tall, ztrans))
  colnames(siteout) <- c("site_sample", "id_sample", "est_age", "depth", "influx", "influxmnx", "tall", "zt")
  outputfile <- paste(transcsvpath, siteid, "_trans_influx_", basename, ".csv", sep="")
  write.table(siteout, outputfile, col.names=TRUE, row.names=FALSE, sep=",")
  
  sn_save[j] <- sitenum
  lam_save[j] <- lafit
  lik_save[j] <- maxlh
  tmean_save[j] <- tmean
  tstdev_save[j] <- tstdev
  
}As the loop executes, one block of information for each site will be printed.
##  [1] 1 
##  [1] 1 
##  [1] "Cygnet     " 
##  [1] "/Projects/GPWG/GPWGv3/data/v3plus/v3plus_sites_csv/0001_data.csv " 
##  [1]    1.0000 -527.6608    0.2200 
##  ...