3.2 Main loop
Loop over the sites, doing the following for each:
- Compose the trans-and-zscore .csv file name
- Read the input data
- Count the number of nonmissing (non-NA) and infinite influx values
- Find bin number of each sample
- Get average zt values (and average ages) for the data in each bin
- Get bin numbers of each bin that had an average (or a single) value
- Write output
Step 3 determines the number of nonmissing (NA) values of zt for each site, and the number of those nonmissing values that are not infinite (Inf). If there are no nonmissing or noninfinite values, the site is skipped. Average ages of the point in the bin are calculated, but not currently used.
# main loop
for (j in seq(1,nsites)) {
# 1. Compose the trans-and-zscore .csv file name
sitenum <- sites[j,1]
sitename <- as.character(sites[j,5])
siteidchar <- as.character(sitenum)
if (sitenum >= 1) siteid <- paste("000", siteidchar, sep="")
if (sitenum >= 10) siteid <- paste("00", siteidchar, sep="")
if (sitenum >= 100) siteid <- paste("0", siteidchar, sep="")
if (sitenum >= 1000) siteid <- paste( siteidchar, sep="")
inputfile <- paste(transcsvpath, siteid, "_trans_influx_",basename,".csv", sep="")
if (file.exists(inputfile)) {
# 2. Read the input data
sitedata <- read.csv(inputfile)
nsamp <- length(sitedata$zt)
nsampchar <- as.character(nsamp)
writeLines(paste("Site",siteidchar,nsampchar,"samples", sep=" "), con = debugfile, sep = "\n")
# 3. Count the number of nonmissing (non-NA) and infinite influx values
nonmiss <- na.omit(sitedata$zt)
numnonmiss <- length(nonmiss)
numinf <- sum(is.infinite(nonmiss))
numnonmiss; numinf
if (length(nonmiss) > 0 & numinf < numnonmiss) {
# add a column of 1's for counting
sitedata$one <- rep(1,length(sitedata[,1]))
# 4. Find bin number of each sample
# this definition of bin number seems to match that implicit in presample.f90
binnum <- as.integer(ceiling((sitedata$est_age-targbeg-(targstep/2))/targstep))+1
# uncommenting the following reveals how each sample is assigned to a bin
#head(cbind(sitedata$est_age,sitedata$zt,binnum,targage[binnum]), nsamp)
# 5. Get average zt values (and average ages) for the data in each bin
binave <- tapply(sitedata$zt, binnum, mean)
binaveage <- tapply(sitedata$est_age, binnum, mean)
bincount <- tapply(sitedata$one, binnum, sum)
# 6. Get bin numbers of each bin that had an average (or a single) value
binsub <- as.numeric(unlist(dimnames(binave)))
# 7. Write output
presampout <- data.frame(targage[binsub],binave,bincount)
presampout <- na.omit(presampout)
colnames(presampout) <- c("age", "zt", "np")
outputfile <- paste(presampcsvpath, siteid, "_presamp_influx_",basename,"_bw",
as.character(targstep),".csv", sep="")
write.table(presampout, outputfile, col.names=TRUE, row.names=FALSE, sep=",")
}
}
}
How long did this take?
proc.time() - ptm
## user system elapsed
## 5.86 0.51 6.42
As the loop executes, one block of information for each site will be printed.
## [1] 1
## [1] 1
## [1] "Cygnet"
## [1] "/Projects/GPWG/GPWGv3/data/v3i/v3i_trans_csv/0001_trans_influx_zt-lme.csv"
## ...