This is a slightly modified version of mdb-to-csv.R that runs under OS X. Unlike the Windows version, where binaries of the RODBC
package are available, and ODBC data-base drivers are built-into the operationg system, under OS X, there several external (to R) components that have to be installed or obtained, and the RODBC
package has to be built from source. The components include:
unixODBC (most easily installed via Homebrew) [http://brew.sh/]. In a terminal window:
brew update
brew install unixodbc
brew link unixodbc
the RODBC
package, built from source. In R, from the Console:
install.packages('RODBC', type='source')
# mdb-to-query_osx.R
# directly reads an Access database and creates individual site .csv files
# database name
dbname <- "GCDv3" # e.g. GCDv03_Marlon_et_al_2015_v2.mdb"
# query label and path to query and query name
datapath <- "/Users/bartlein/Projects/GCDv3/GCDv3Data/v3i_Rscripts/"
querypath <- "/Users/bartlein/Projects/GCDv3/GCDv3Data/v3i_Rscripts/v3i_query/"
# if the query output folder does not exist, create it
dir.create(file.path(querypath), showWarnings=FALSE)
# query file names
querysitename <- "v3i_sites.csv"
querydataname <- "v3i_data.csv"
# path to .csv output
csvpath <- paste(datapath, "v3i_sites_csv_osx/", sep="")
# if output folder does not exist, create it
dir.create(file.path(csvpath), showWarnings=FALSE)
# path to sitelist output
sitelistpath <- paste(datapath, "v3i_sitelists/", sep="")
# if output folder does not exist, create it
dir.create(file.path(sitelistpath), showWarnings=FALSE)
# sitelist output label
sitelistname <- "v3i_all"
# debug/log file
debugpath <- paste(datapath, "v3i_debug/", sep="")
# if debug folder does not exist, create it
dir.create(file.path(debugpath), showWarnings=FALSE)
debugname <- "mdb-to-csv_debug.txt"
# open the debug/log file
debugfile <- file(paste(debugpath, debugname, sep=""), "w")
# setup
maxsites <- 2000
maxsamples <- 9000
miss <- -9999.0
# load RODBC library and connect to the database
library(RODBC)
gcdv3.db <- odbcConnect(dbname)
odbcGetInfo(gcdv3.db)
# check for existence of database site and data views
sqlTables(gcdv3.db, tableName="v3i_sites", tableType="TABLE")
sqlColumns(gcdv3.db, "v3i_sites")[4]
sqlTables(gcdv3.db, tableName="v3i_data", tableType="TABLE")
sqlColumns(gcdv3.db, "v3i_data")[4]
# site query
site_query <- sqlFetch(gcdv3.db, "v3i_sites")
site_query$SITE_NAME <- as.character(site_query$SITE_NAME)
head(site_query)
str(site_query)
# data query
data_query <- sqlFetch(gcdv3.db, "v3i_data")
head(data_query)
str(data_query)
# close the database
odbcClose(gcdv3.db)
# site .csv file
sitecsvpath <- "/Projects/GPWG/GPWGv3/GCDv3Data/v3i/v3i_query/"
write.csv(site_query, paste(sitecsvpath, querysitename, sep=""), row.names=FALSE)
# data .csv file
datacsvpath <- "/Projects/GPWG/GPWGv3/GCDv3Data/v3i/v3i_query/"
write.csv(data_query, paste(datacsvpath, querydataname, sep=""), row.names=FALSE)
z
# rewrite sitefile as .csv file for sorting by region and depositional context
sitelist <- data.frame(site_query$ID_SITE, site_query$LATITUDE, site_query$LONGITUDE, site_query$ELEV,
site_query$ID_DEPO_CONTEXT, site_query$SITE_NAME, stringsAsFactors = FALSE)
names(sitelist) <- c("Site_ID", "Lat", "Lon", "Elev", "depo_context", "Site_Name")
head(sitelist)
str(sitelist)
sitelistfile <- paste(sitelistpath, sitelistname, ".csv", sep="")
write.table(sitelist, sitelistfile, row.names=FALSE, sep=",")
# loop over sites
for (j in 1:maxsites) {
nsamp <- 0
sitedata <- data_query[data_query$ID_SITE == j, ]
nsamp <- length(sitedata$ID_SITE)
head(sitedata)
tail(sitedata)
# local variables
if (nsamp > 0) {
jchar <- as.character(j)
nsampchar <- as.character(nsamp)
writeLines(paste("Site",jchar,nsampchar,"samples", sep=" "), con = debugfile, sep = "\n")
# local variables
depth <- sitedata$DEPTH; age <- sitedata$EST_AGE; quant <- sitedata$QUANTITY
depth[is.na(depth)] <- miss
age[is.na(age)] <- miss
quant[is.na(quant)] <- miss
thickness <- rep(miss, nsamp); dep_time <- rep(miss, nsamp); sed_rate <- rep(miss, nsamp)
unit_dep_time <- rep(miss, nsamp)
xst_level <- as.character(sitedata[1,9])
# sed rate and deposition time
# first (top) sample
if (depth[1] != miss && depth[2] != miss) {
thickness[1] <- (depth[2] - depth[1])*100.0 # meters to cm (depth in m, influx and conc in cm)
dep_time[1] <- age[2] - age[1]
if (dep_time[1] > 0.0) sed_rate[1] <- thickness[1]/dep_time[1]
if (sed_rate[1] != miss) unit_dep_time[1] <- 1.0/sed_rate[1]
}
# samples 2 to nsamp-1
for (i in 2:(nsamp-1)) {
if (depth[1] != miss && depth[2] != miss) {
thickness[i] <- (depth[i+1] - depth[i])*100.0
dep_time[i] <- ((age[i+1] + age[i])/2.0) - ((age[i] + age[i-1])/2.0)
if (dep_time[i] > 0.0) sed_rate[i] <- thickness[i]/dep_time[i]
if (sed_rate[i] != miss) unit_dep_time[i] <- 1.0/sed_rate[i]
}
}
# last (bottom) sample
if (depth[nsamp-1] != miss && depth[nsamp] != miss) {
thickness[nsamp] <- thickness[nsamp-1] # replicate thickness
dep_time[nsamp] <- age[nsamp] - age[nsamp-1]
sed_rate[nsamp] <- sed_rate[nsamp-1] # replicate sed_rate
unit_dep_time[nsamp] <- unit_dep_time[nsamp-1]
}
# counts of missing values
depth_count <- 0; age_count <- 0; quant_count <- 0; sed_rate_count <- 0; sed_rate_flag <- 1
depth_count <- sum(depth != miss)
age_count <- sum(age != miss)
quant_count <- sum(quant != miss)
sed_rate_count <- sum(sed_rate != miss)
if (sed_rate_count != nsamp) sed_rateflag = 0
# check for age or depth reversals, and zero or negative sed rates (in nonmissing data)
depth_reversal <- 0; age_reversal <- 0; sed_rate_zeroneg <- 0
for (i in 2:nsamp) {
if (age[i] != miss && age[i-1] != miss && age[i] <= age[i-1]) age_reversal=1
if (depth[i] != miss && depth[i-1] != miss) {
if (depth[i] <= depth[i-1]) depth_reversal=1
}
}
for (i in 2:nsamp) {
if (sed_rate[i] != miss && sed_rate[i] <= 0.0) sed_rate_zeroneg=1
}
# set and write out various flags
if (depth_count != 0 && depth_count != nsamp) {
writeLines(paste("**** has a missing depth when some are nonmissing", sep=" "), con = debugfile, sep = "\n")
}
if (age_count != 0 && age_count != nsamp) {
writeLines(paste("**** has a missing age when some are nonmissing", sep=" "), con = debugfile, sep = "\n")
}
if (quant_count != 0 && quant_count != nsamp) {
writeLines(paste("**** has a missing quantity when some are nonmissing", sep=" "), con = debugfile, sep = "\n")
}
if (sed_rate_count != 0 && sed_rate_count != nsamp) {
writeLines(paste("**** has a missing sed rate when some are nonmissing", sep=" "), con = debugfile, sep = "\n")
}
if (depth_reversal != 0) {
writeLines(paste("**** has a depth reversal", sep=" "), con = debugfile, sep = "\n")
}
if (age_reversal != 0) {
writeLines(paste("**** has an age reversal", sep=" "), con = debugfile, sep = "\n")
}
if (sed_rate_zeroneg != 0) {
writeLines(paste("**** has zero or negative sed rates", sep=" "), con = debugfile, sep = "\n")
}
# alternative quantities
conc <- rep(miss, nsamp); influx <- rep(miss, nsamp)
influx_source <- rep("none", nsamp) ; conc_source <- rep("none", nsamp)
# select case based on xst_level
if (xst_level == "INFL") # adopt influx values as they are, calculate concentration
{
influx <- quant
influx_source <- "data"
if (influx != miss && unit_dep_time != miss && sed_rate != 0.0) {
conc <- influx * unit_dep_time
conc_source <- "calculated from influx "
} else {
conc <- quant
conc_source <- "copied from quant "
}
writeLines("INFL", con = debugfile, sep = "\n")
}
else if (xst_level == "CONC") # calculate influx, adopt conc values as they are
{
conc <- quant
conc_source <- "data"
if (conc != miss && sed_rate != miss && sed_rate != 0.0) {
influx <- quant * sed_rate
influx_source <- "calculated from conc "
} else {
influx <- quant
influx_source <- "copied from quant "
}
writeLines("CONC", con = debugfile, sep = "\n")
}
else if (xst_level == "C0P0") # assume quantity is concentration like
{
conc <- quant
conc_source <- "C0P0"
if (sed_rate != miss && sed_rate != 0.0) {
influx <- quant * sed_rate
influx_source <- "calculated from C0P0 (conc) "
} else {
influx <- quant
influx_source <- "copied from quant "
}
writeLines("C0P0", con = debugfile, sep = "\n")
}
else if (xst_level == "SOIL") # just copy
{
conc <- quant
conc_source <- "copied from quant "
influx <- quant
influx_source <- "copied from quant "
writeLines("SOIL", con = debugfile, sep = "\n")
}
else if (xst_level == "OTHE") # just copy
{
conc <- quant
conc_source <- "copied from quant "
influx <- quant
influx_source <- "copied from quant "
writeLines("OTHE", con = debugfile, sep = "\n")
}
else
{
conc <- quant
conc_source <- "copied from quant "
influx <- quant
influx_source <- "copied from quant "
writeLines("Unknown", con = debugfile, sep = "\n")
}
}
# check for influx == 0.0 everywhere
nzero <- 0
nzero <- sum(influx != 0.0)
if (nzero == 0) {
writeLines(paste("**** has no non-zero influx values", sep=" "), con = debugfile, sep = "\n")
}
# .csv out
if (nsamp > 0 && nzero > 0) {
# get siteid string
siteidchar <- as.character(j)
if (j >= 1) siteid <- paste("000", siteidchar, sep="")
if (j >= 10) siteid <- paste("00", siteidchar, sep="")
if (j >= 100) siteid <- paste("0", siteidchar, sep="")
if (j >= 1000) siteid <- paste( siteidchar, sep="")
sitehdr <- paste("site", siteid, sep="")
# assemble output data and write it out
samplenum <- seq(1:nsamp)
outdata <- data.frame(samplenum,sitedata$ID_SAMPLE, depth, age, sed_rate, quant, conc,
influx, xst_level, conc_source, influx_source)
names(outdata) <- c(sitehdr, "id_sample", "depth", "est_age", "sed_rate", "quant", "conc",
"influx", "xst_level", "conc_source", "influx_source" )
csvfile <- paste(csvpath,siteid,"_data.csv", sep="")
write.csv(outdata, csvfile, row.names=FALSE)
}
}
close(debugfile)