Currently it is difficult to prospectively estimate human toxicokinetics (particularly for novel chemicals) in a high-throughput manner. The R software package httk has been developed, in part, to address this deficiency, and the aim of this investigation was to develop a generalized inhalation model for httk. The structure of the inhalation model was developed from two previously published physiologically-based models from Jongeneelen et al. (2011) and Clewell et al. (2001) while calculated physicochemical data was obtained from EPA’s CompTox Chemicals Dashboard. In total, 142 exposure scenarios across 41 volatile organic chemicals were modeled and compared to published data. The slope of the regression line of best fit between log-transformed simulated and observed combined measured plasma and blood concentrations was 0.59 with an r2= 0.54 and a Root Mean Square Error (RMSE) of direct comparison between the log-transformed simulated and observed values of 0.87. Approximately 3.6% (n = 73) of the data points analyzed were > 2 orders of magnitude different than expected. The volatile organic chemicals examined in this investigation represent small, generally lipophilic molecules. Ultimately this paper details a generalized inhalation component that integrates with the httk physiologically-based toxicokinetic model to provide high-throughput estimates of inhalation chemical exposures.
knitr::opts_chunk$set(echo = TRUE, fig.width=5, fig.height=4)
library(httk)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:gdata':
##
## combine
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:gdata':
##
## combine, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
###Get metabolism and concentration data
Identify chemicals currently in our metabolism data that we don’t have good concentration/time data for and remove them from our training dataset
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 32.04 84.93 102.18 110.28 131.38 252.32
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.6059 1.0047 1.9649 2.1152 2.8737 6.1309
# Unsurprisingly then, the chemicals are generally less water-soluble
summary(met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.001759 0.013948 2.073901 0.254380 26.122900
##
## Human Rat
## 59.80392 40.19608
##
## ABL BL BL (+W) EB EB (+W) EEB MEB
## 4.2483660 35.3408030 0.6069094 24.8366013 0.7002801 2.1008403 0.4668534
## PL VBL
## 1.4472456 30.2521008
# Create a dataframe with 1 row for each unique external exposure scenario
unique_scenarios <- conc_data[with(conc_data,
order(PREFERRED_NAME,
CONC_SPECIES,
SAMPLING_MATRIX,
as.numeric(as.character(DOSE)),EXP_LENGTH,-TIME)),] %>%
distinct(DTXSID,DOSE,DOSE_U,EXP_LENGTH,CONC_SPECIES,SAMPLING_MATRIX, .keep_all = TRUE)
Create a list of dataframes of observed and predicted concentrations for each unique external exposure scenario
plist <- list()
simlist <- list()
obslist <- list()
for(i in 1:nrow(unique_scenarios)){
#tryCatch({
relconc <- subset(conc_data,conc_data$DTXSID == unique_scenarios$DTXSID[i] &
conc_data$DOSE == unique_scenarios$DOSE[i] &
conc_data$EXP_LENGTH == unique_scenarios$EXP_LENGTH[i] &
conc_data$CONC_SPECIES == unique_scenarios$CONC_SPECIES[i] &
conc_data$SAMPLING_MATRIX == unique_scenarios$SAMPLING_MATRIX[i])
obslist[[i]] <- relconc
name <- paste0("out",i)
if(as.character(unique_scenarios$CONC_SPECIES[i]) == "Human"){
solve <- suppressWarnings(assign(name, solve_gas_pbtk(
chem.cas = unique_scenarios$CASRN[i],
days = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]),
# Make sure we get conc's at the observed times:
times=signif(obslist[[i]]$TIME,4),
tsteps = 500,
exp.conc = ((as.numeric(unique_scenarios$DOSE[i])*1e20*1000)/24450)/1e20,
exp.duration = unique_scenarios$EXP_LENGTH[i]*24,
period = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i])*24,
species = as.character(unique_scenarios$CONC_SPECIES[i]),
vmax.km = FALSE,
vmax = met_data$VMAX[met_data$CASRN %in% unique_scenarios$CASRN[i] &
met_data$SPECIES == unique_scenarios$CONC_SPECIES[i]],
km = met_data$KM[met_data$CASRN %in% unique_scenarios$CASRN[i] &
met_data$SPECIES == unique_scenarios$CONC_SPECIES[i]],
suppress.messages=TRUE)))
} else {
solve <- suppressWarnings(assign(name, solve_gas_pbtk(
chem.cas = unique_scenarios$CASRN[i],
days = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i]),
# Make sure we get conc's at the observed times:
times=signif(obslist[[i]]$TIME,4),
tsteps = 500,
exp.conc = ((as.numeric(unique_scenarios$DOSE[i])*1e20*1000)/24450)/1e20,
exp.duration = unique_scenarios$EXP_LENGTH[i]*24,
period = (unique_scenarios$TIME[i]+unique_scenarios$EXP_LENGTH[i])*24,
species = as.character(unique_scenarios$CONC_SPECIES[i]),
vmax.km = TRUE,
vmax = met_data$VMAX[met_data$CASRN %in% unique_scenarios$CASRN[i] &
met_data$SPECIES == unique_scenarios$CONC_SPECIES[i]],
km = met_data$KM[met_data$CASRN %in% unique_scenarios$CASRN[i] &
met_data$SPECIES == unique_scenarios$CONC_SPECIES[i]],
suppress.messages=TRUE)))
}
#browser()
solve <- as.data.frame(solve)
# Sets the output units appropriate for the sampling matrix
if (unique_scenarios$SAMPLING_MATRIX[i] == "VBL" |
unique_scenarios$SAMPLING_MATRIX[i] == "BL" |
unique_scenarios$SAMPLING_MATRIX[i] == "BL (+W)")
{
solve$simconc <- solve$Cven
solve$unit <- "uM"
} else if (unique_scenarios$SAMPLING_MATRIX[i] == "ABL") {
solve$simconc <- solve$Cart
solve$unit <- "uM"
} else if (unique_scenarios$SAMPLING_MATRIX[i] == "EB" |
unique_scenarios$SAMPLING_MATRIX[i] == "EEB" |
unique_scenarios$SAMPLING_MATRIX[i] == "EB (+W)")
{
solve$simconc <- solve$Cendexh * 24.45
solve$unit <- "ppm"
} else if (unique_scenarios$SAMPLING_MATRIX[i] == "MEB") {
solve$simconc <- solve$Cmixexh * 24.45
solve$unit <- "ppm"
} else if (unique_scenarios$SAMPLING_MATRIX[i] == "PL"){
solve$simconc <- solve$Cplasma
solve$unit <- "uM"
} else {
solve$simconc <- NA
solve$unit <- NA
}
simlist[[i]] <- solve
plot.data <- solve
name1 <- paste0("c.vs.t",i)
#Right now this is only calculating real concentrations according to mg/L in blood
plots <- assign(name1, ggplot(plot.data, aes(time*24, simconc)) +
geom_line() +
xlab("Time (h)") +
ylab(paste0("Simulated ",
unique_scenarios$SAMPLING_MATRIX[i],
"\nConcentration (" ,
solve$unit, ")")) +
ggtitle(paste0(
unique_scenarios$PREFERRED_NAME[i],
" (",
unique_scenarios$CONC_SPECIES[i],
", ",
round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
unique_scenarios$DOSE_U[i],
" for ",
round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
"h in ",
unique_scenarios$SAMPLING_MATRIX[i], ")")) +
geom_point(data = relconc, aes(TIME*24,CONCENTRATION)) +
theme(text = element_text(size=10))+
theme_bw())
plist[[i]] <- plots
#}, error = function(e){})
}
rm(list=ls(pattern='out'))
rm(list=ls(pattern='c.vs.t'))
Create a list to hold the combined observations and predictions for each scenario:
# Creation of simulated vs. observed concentration dataset
unique_scenarios$RSQD <- 0
unique_scenarios$RMSE <- 0
unique_scenarios$AIC <- 0
simobslist <- list()
obvpredlist <- list()
Merge the simulations and observations on the basis of simualation time:
for(i in 1:length(simlist))
{
obsdata <- as.data.frame(obslist[[i]])
simdata <- as.data.frame(simlist[[i]])
# skips over anything for which there was no observed data or
# insufficient information to run simulation:
if (!is.null(simlist[[i]]) & !is.null(obslist[[i]]))
{
# Make sure we are looking at consistent time points:
simobscomb <- simdata[simdata$time %in% signif(obsdata$TIME,4),]
obsdata <- subset(obsdata,signif(TIME,4) %in% simobscomb$time)
# Merge with obsdata
colnames(obsdata)[colnames(obsdata) ==
"TIME"] <-
"obstime"
# Round to match sim time:
obsdata$time <- signif(obsdata$obstime,4)
colnames(obsdata)[colnames(obsdata) ==
"CONCENTRATION"] <-
"obsconc"
colnames(obsdata)[colnames(obsdata) ==
"PREFERRED_NAME"] <-
"chem"
colnames(obsdata)[colnames(obsdata) ==
"DOSE"] <-
"dose"
colnames(obsdata)[colnames(obsdata) ==
"EXP_LENGTH"] <-
"explen"
colnames(obsdata)[colnames(obsdata) ==
"CONC_SPECIES"] <-
"species"
colnames(obsdata)[colnames(obsdata) ==
"SAMPLING_MATRIX"] <-
"matrix"
colnames(obsdata)[colnames(obsdata) ==
"AVERAGE_MASS"] <-
"mw"
colnames(obsdata)[colnames(obsdata) ==
"ORIG_CONC_U"] <-
"orig_conc_u"
simobscomb <- suppressWarnings(merge(obsdata[,c(
"time",
"obstime",
"obsconc",
"chem",
"dose",
"explen",
"species",
"matrix",
"mw",
"orig_conc_u"
)], simobscomb, by="time", all.x=TRUE))
# Merge with met_data
this.met_data <- subset(met_data,
PREFERRED_NAME == simobscomb[1,"chem"] &
SPECIES == simobscomb[1,"species"])
colnames(this.met_data)[colnames(this.met_data)=="CHEM_CLASS"] <-
"chemclass"
colnames(this.met_data)[colnames(this.met_data) ==
"OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED"] <-
"logp"
colnames(this.met_data)[colnames(this.met_data) ==
"WATER_SOLUBILITY_MOL.L_OPERA_PRED"] <-
"sol"
colnames(this.met_data)[colnames(this.met_data) ==
"HENRYS_LAW_ATM.M3.MOLE_OPERA_PRED"] <-
"henry"
colnames(this.met_data)[colnames(this.met_data) ==
"VMAX"] <-
"vmax"
colnames(this.met_data)[colnames(this.met_data) ==
"KM"] <-
"km"
simobscomb <- suppressWarnings(cbind(simobscomb,this.met_data[c(
"chemclass",
"logp",
"sol",
"henry",
"vmax",
"km")]))
simobslist[[i]] <- simobscomb
}
}
Identify the appropriate matric (for example, exhaled breath) for each observation:
for(i in 1:length(simobslist))
if (nrow(simobslist[[i]])>0)
{
simobscomb <- simobslist[[i]]
# Match the matrix for each observation:
for (j in 1:nrow(simobscomb))
if(!is.na(simobscomb$matrix[j]))
{
if (simobscomb$matrix[j] == "VBL" |
simobscomb$matrix[j] == "BL" |
simobscomb$matrix[j] == "BL (+W)")
{
simobscomb$simconc[j] <- simobscomb$Cven[j]
} else if (simobscomb$matrix[j] == "ABL") {
simobscomb$simconc[j] <- simobscomb$Cart[j]
} else if (simobscomb$matrix[j] == "EB" |
simobscomb$matrix[j] == "EEB" |
simobscomb$matrix[j] == "EB (+W)") {
simobscomb$simconc[j] <- simobscomb$Cendexh[j] * 24.45
} else if (simobscomb$matrix[j] == "MEB") {
simobscomb$simconc[j] <- simobscomb$Cendexh[j] * 24.45
} else if (simobscomb$matrix[j] == "PL") {
simobscomb$simconc[j] <- simobscomb$Cplasma[j]
} else {
simobscomb$simconc[j] <- NA
}
}
simobslist[[i]] <- simobscomb
}
Identify which quartile each observation occured in with respect to the latest (maximum) observed time
for(i in 1:length(simobslist))
if (nrow(simobslist[[i]])>0)
{
simobscomb <- simobslist[[i]]
for (j in 1:nrow(simobscomb))
{
max.time <- max(simobscomb$time,na.rm=TRUE)
if (is.na(max.time)) simobscomb$tquart <- NA
else if (max.time == 0) simobscomb$tquart <- "1"
else if (!is.na(simobscomb$time[j]))
{
simobscomb$tquart[j] <- as.character(1 +
floor(simobscomb$time[j]/max.time/0.25))
simobscomb$tquart[simobscomb$tquart=="5"] <-
"4"
} else simobscomb$tquart[j] >- NA
}
simobslist[[i]] <- simobscomb
}
Calculate the area under the curve (AUC)
for(i in 1:length(simobslist))
if (nrow(simobslist[[i]])>0)
{
simobscomb <- simobslist[[i]]
# Calculat the AUC with the trapezoidal rule:
if (nrow(simobscomb)>1)
{
for (k in 2:max(nrow(simobscomb)-1,2,na.rm=TRUE))
{
simobscomb$obsAUCtrap[1] <- 0
simobscomb$simAUCtrap[1] <- 0
if (min(simobscomb$time) <= (simobscomb$explen[1]*1.03) &
nrow(simobscomb) >=2)
{
simobscomb$obsAUCtrap[k] <- simobscomb$obsAUCtrap[k-1] +
0.5*(simobscomb$time[k] - simobscomb$time[k-1]) *
(simobscomb$obsconc[k] + simobscomb$obsconc[k-1])
simobscomb$simAUCtrap[k] <- simobscomb$simAUCtrap[k-1] +
0.5*(simobscomb$time[k]-simobscomb$time[k-1]) *
(simobscomb$simconc[k] + simobscomb$simconc[k-1])
} else {
simobscomb$obsAUCtrap <- 0
simobscomb$simAUCtrap <- 0
}
}
} else {
simobscomb$obsAUCtrap <- 0
simobscomb$simAUCtrap <- 0
}
simobscomb$AUCobs <- max(simobscomb$obsAUCtrap)
simobscomb$AUCsim <- max(simobscomb$simAUCtrap)
simobscomb$calcAUC <- max(simobscomb$AUC)
if (min(simobscomb$time) <= simobscomb$explen[1]*1.03)
{
simobscomb$Cmaxobs <- max(simobscomb$obsconc)
simobscomb$Cmaxsim <- max(simobscomb$simconc)
} else {
simobscomb$Cmaxobs <- 0
simobscomb$Cmaxsim <- 0
}
simobslist[[i]] <- simobscomb
}
Calculate performance statistics
for(i in 1:length(simobslist))
if (nrow(simobslist[[i]])>0)
{
simobscomb <- simobslist[[i]]
unique_scenarios$RSQD[i] <- 1 - (
sum((simobscomb$obsconc - simobscomb$simconc)^2) /
sum((simobscomb$obsconc-mean(simobscomb$obsconc))^2)
)
unique_scenarios$RMSE[i] <-
sqrt(mean((simobscomb$simconc - simobscomb$obsconc)^2))
unique_scenarios$AIC[i] <-
nrow(simobscomb)*(
log(2*pi) + 1 +
log((sum((simobscomb$obsconc-simobscomb$simconc)^2) /
nrow(simobscomb)))
) + ((44+1)*2) #44 is the number of parameters from inhalation_inits.R
simobslist[[i]] <- simobscomb
}
Make a plot for each scenario
for(i in 1:length(simobslist))
if (nrow(simobslist[[i]])>0)
{
simobscomb <- simobslist[[i]]
obvpredplot <- ggplot(simobscomb, aes(x = simconc, y = obsconc)) +
geom_point() +
geom_abline() +
xlab("Simulated Concentrations (uM)") +
ylab("Observed Concentrations (uM)") +
ggtitle(paste0(
unique_scenarios$PREFERRED_NAME[i],
" (",
unique_scenarios$CONC_SPECIES[i],
", ",
round(as.numeric(unique_scenarios$DOSE[i]), digits = 2),
unique_scenarios$DOSE_U[i],
" for ",
round(unique_scenarios$EXP_LENGTH[i]*24, digits = 2),
"h in ",
unique_scenarios$SAMPLING_MATRIX[i], ")")) +
theme_bw() +
theme(plot.title = element_text(face = 'bold', size = 20),
axis.title.x = element_text(face = 'bold', size = 20),
axis.text.x = element_text(size=16),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 16),
legend.title = element_text(face = 'bold', size = 16),
legend.text = element_text(face = 'bold',size = 14))
obvpredlist[[i]] <- obvpredplot
}
simobsfull <- do.call("rbind",simobslist)
simobsfullrat <- subset(simobsfull, simobsfull$species == "Rat")
simobsfullhum <- subset(simobsfull, simobsfull$species == "Human")
unique_scenarios <- subset(unique_scenarios,!is.na(unique_scenarios$RSQD))
for (i in 1:length(plist))
{
plist[[i]] <- plist[[i]] +
geom_text(
x = Inf,
y = Inf,
hjust = 1.3,
vjust = 1.3,
# size = 6,
label = paste0(
"RMSE: ",
round(unique_scenarios$RMSE[i],digits = 2),
"\nAIC: ",
round(unique_scenarios$AIC[i],digits = 2)))# +
# theme(
# plot.title = element_text(face = 'bold', size = 15),
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.text.x = element_text(size=16),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 16),
# legend.title = element_text(face = 'bold', size = 16),
# legend.text = element_text(face = 'bold',size = 14))
}
Other analytics including linear regression on overall concentration vs. time observed vs. predicted
##
## Human Rat
## 72 65
nrow(simobsfull) - nrow(simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0,])
## [1] 575
pmiss <- (nrow(simobsfull) -
nrow(simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0,])) /
nrow(simobsfull) * 100
missdata <- (simobsfull[
is.na(simobsfull$simconc) |
simobsfull$simconc <= 0 |
simobsfull$obsconc <= 0,])
t0df <- simobsfull[simobsfull$obstime == 0,]
lmall <- lm(
#log transforms:
log10(simobsfull$obsconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0]) ~
#log transforms:
log10(simobsfull$simconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0]))
#Linear binned 1
lmsub1 <- lm(
simobsfull$obsconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc < 0.1] ~
simobsfull$simconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc < 0.1])
#Linear binned 2
lmsub2 <- lm(
simobsfull$obsconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc >= 0.1 &
simobsfull$obsconc < 10] ~
simobsfull$simconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc >= 0.1 &
simobsfull$obsconc < 10])
#Linear binned 3
lmsub3 <- lm(
simobsfull$obsconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc >= 10] ~
simobsfull$simconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc >= 10])
lmrat <- lm(
log10(simobsfullrat$obsconc[
!is.na(simobsfullrat$simconc) &
simobsfullrat$simconc > 0 &
simobsfullrat$obsconc > 0]) ~
log10(simobsfullrat$simconc[
!is.na(simobsfullrat$simconc) &
simobsfullrat$simconc > 0 &
simobsfullrat$obsconc > 0]))
unique(simobsfullrat$chem)
## [1] "1,1-Dichloroethylene" "1,2-Dichloroethane"
## [3] "1,2-Dichloropropane" "1,3-Butadiene"
## [5] "2,2-Dichloro-1,1,1-trifluoroethane" "Acrylonitrile"
## [7] "Benzene" "Carbon tetrachloride"
## [9] "Chloroform" "Decane"
## [11] "Ethylbenzene" "Furan"
## [13] "Isopropanol" "Methanol"
## [15] "Nonane" "Octane"
## [17] "Pyrene" "Styrene"
## [19] "Tetrachloroethylene" "Toluene"
## [21] "Trichloroethylene" "n-Hexane"
lmhum <- lm(
log10(simobsfullhum$obsconc[
!is.na(simobsfullhum$simconc) &
simobsfullhum$simconc > 0 &
simobsfullhum$obsconc > 0]) ~
log10(simobsfullhum$simconc[
!is.na(simobsfullhum$simconc) &
simobsfullhum$simconc > 0 &
simobsfullhum$obsconc > 0]))
unique(simobsfullhum$chem)
## [1] "1,1,1,2-Tetrafluoroethane"
## [2] "1,1,1-Trichloroethane"
## [3] "1,1,2-Trichloro-1,2,2-trifluoroethane"
## [4] "1,2,4-Trimethylbenzene"
## [5] "1,3-Butadiene"
## [6] "1,4-Dioxane"
## [7] "2-Butoxyethanol"
## [8] "2H-Perfluoropropane"
## [9] "Benzene"
## [10] "Bromotrifluoromethane"
## [11] "Chlorobenzene"
## [12] "Dichlorodifluoromethane"
## [13] "Dichloromethane"
## [14] "Ethanol"
## [15] "Ethyl T-butyl ether"
## [16] "Ethylbenzene"
## [17] "Isopropanol"
## [18] "Methyl ethyl ketone"
## [19] "Methyl tert-butyl ether"
## [20] "N-Methyl-2-pyrrolidone"
## [21] "Styrene"
## [22] "Tetrachloroethylene"
## [23] "Tetrahydrofuran"
## [24] "Trichloroethylene"
## [25] "Vinyl chloride"
## [26] "tert-Amyl methyl ether"
concregslope <- summary(lmall)$coef[2,1]
concregr2 <- summary(lmall)$r.squared
concregrmse <- sqrt(mean(lmall$residuals^2))
totalrmse <- sqrt(mean((
log10(simobsfull$simconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0]) -
log10(simobsfull$obsconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0]))^2,
na.rm = TRUE))
totalmae <- mean(abs(
log10(simobsfull$simconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0]) -
log10(simobsfull$obsconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0])),
na.rm = TRUE)
totalaic <- nrow(
simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc >0 &
simobsfull$obsconc >
0,]) *
(log(2*pi) +
1 +
log((sum(
(simobsfull$obsconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0] -
simobsfull$simconc[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0])^2,
na.rm=TRUE) /
nrow(simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0,])))) +
((44+1)*2) #44 is the number of parameters from inhalation_inits.R
mispred <- table(abs(
log10(simobsfull$simconc) -
log10(simobsfull$obsconc))>2 &
simobsfull$simconc>0)
mispred[2]
## TRUE
## 134
mispred[2] / nrow(simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc >0 &
simobsfull$obsconc > 0,])*100
## TRUE
## 8.328154
overpred <- table(
log10(simobsfull$simconc) -
log10(simobsfull$obsconc)>2 &
simobsfull$simconc>0)
overpred[2]
## TRUE
## 10
overpred[2] / nrow(simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc >0 &
simobsfull$obsconc > 0,])*100
## TRUE
## 0.621504
underpred <- table(
log10(simobsfull$obsconc) -
log10(simobsfull$simconc)>2 &
simobsfull$simconc>0)
underpred[2]
## TRUE
## 124
underpred[2] / nrow(simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc >0 &
simobsfull$obsconc > 0,])*100
## TRUE
## 7.70665
mispredhalf <- table(abs(
log10(simobsfull$simconc) -
log10(simobsfull$obsconc))>0.5 &
simobsfull$simconc>0)
mispredhalf[2]
## TRUE
## 675
mispredhalf[2] / nrow(simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc >0 &
simobsfull$obsconc > 0,])*100
## TRUE
## 41.95152
overpredhalf <- table(
log10(simobsfull$simconc) -
log10(simobsfull$obsconc)>0.5 &
simobsfull$simconc>0)
overpredhalf[2]
## TRUE
## 290
overpredhalf[2] / nrow(simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc >0 &
simobsfull$obsconc > 0,])*100
## TRUE
## 18.02362
underpredhalf <- table(
log10(simobsfull$obsconc) -
log10(simobsfull$simconc)>0.5 &
simobsfull$simconc>0)
underpredhalf[2]
## TRUE
## 385
underpredhalf[2] / nrow(simobsfull[
!is.na(simobsfull$simconc) &
simobsfull$simconc > 0 &
simobsfull$obsconc > 0,])*100
## TRUE
## 23.92791
chemunderpred <- subset(simobsfull,
log10(simobsfull$simconc) -
log10(simobsfull$obsconc) < 0 &
simobsfull$simconc > 0)
table(chemunderpred$chemclass) / table(simobsfull$chemclass)*100
##
## Alcohol Aliphatic hydrocarbon
## 36.666667 16.438356
## Aromatic hydrocarbon Ether
## 43.891403 45.967742
## Fluorinated organic compound Halogenated organic compound
## 9.309309 47.285887
## Other
## 65.236052
fig2 <- ggplot(
data = simobsfull[
simobsfull$simconc > 0 &
simobsfull$obsconc > 0,],
aes(x = log10(simconc), y = log10(obsconc))) +
geom_point(
color = ifelse(
abs(
log10(simobsfull[
simobsfull$simconc > 0 &
simobsfull$obsconc > 0,]$simconc) -
log10(simobsfull[
simobsfull$simconc > 0 &
simobsfull$obsconc > 0,]$obsconc)) >2,
'red',
'black')) +
geom_abline() +
xlab("Log(Simulated Concentrations)") +
ylab("Log(Observed Concentrations)") +
theme_bw() +
geom_smooth(method = 'lm',se = FALSE, aes(color = 'Overall')) +
geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
geom_text(
x = Inf,
y = -Inf,
hjust = 1.05,
vjust = -0.15,
# size = 6,
label = paste0(
"Regression slope: ",
round(summary(lmall)$coef[2,1],digits = 2),
"\nRegression R^2: ",
round(summary(lmall)$r.squared,digits = 2),
"\nRegression RMSE: ",
round(sqrt(mean(lmall$residuals^2)),digits = 2),
"\nRMSE (Identity): ",
round(totalrmse,digits = 2),
"\n% Missing:",
round(pmiss, digits = 2), "%")) +
geom_text(
data = simobsfull[
abs(log10(simobsfull$simconc) - log10(simobsfull$obsconc))>7 &
simobsfull$simconc>0 & simobsfull$obsconc > 0,],
aes(label = paste(chem,species,matrix)),
fontface = 'bold',
check_overlap = TRUE,
# size = 3.5,
hjust = 0.5,
vjust = -0.8) +
scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) #+
# theme(
# plot.title = element_text(face = 'bold', size = 15),
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.text.x = element_text(size=16),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 16),
# legend.title = element_text(face = 'bold', size = 16),
# legend.text = element_text(face = 'bold',size = 14))
fig2 #Display plot in R
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 141 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 141 rows containing non-finite values (stat_smooth).
## Warning: Removed 141 rows containing missing values (geom_point).
## Warning: Removed 141 rows containing missing values (geom_text).
# Create data and run calculations for populating plots
cmaxfull <- subset(simobsfull, !duplicated(simobsfull$AUCobs) & simobsfull$Cmaxobs != 0)
cmaxobs <- cmaxfull$Cmaxobs
cmaxsim <- cmaxfull$Cmaxsim
cmaxobs <- cmaxobs[!is.nan(cmaxsim)]
cmaxsim <- cmaxsim[!is.nan(cmaxsim)]
cmaxsim[!is.finite(log10(cmaxsim))] <- NA
cmaxlm <- lm(log10(cmaxobs)~log10(cmaxsim), na.action = na.exclude)
cmaxvcbkg <- subset(cmaxfull,
paste(
cmaxfull$chem,
cmaxfull$dose,
cmaxfull$explen,
cmaxfull$species,
cmaxfull$matrix) %in%
paste(
t0df$chem,
t0df$dose,
t0df$explen,
t0df$species,
t0df$matrix))
cmaxvcbkg$cmaxcbkgratio <- cmaxvcbkg$Cmaxobs / cmaxvcbkg$obsconc
cmaxvcbkg$adjustedCmaxsim <- cmaxvcbkg$Cmaxsim - cmaxvcbkg$obsconc
aucfull <-subset(simobsfull,
!duplicated(simobsfull$AUCobs) &
simobsfull$AUCobs != 0)
aucobs <- aucfull$AUCobs
aucsim <- aucfull$AUCsim
aucobs <- aucobs[!is.nan(aucsim)]
aucsim <- aucsim[!is.nan(aucsim)]
aucsim[!is.finite(log10(aucsim))] <- NA
auclm <- lm(log10(aucobs)~log10(aucsim), na.action = na.exclude)
cmaxslope <- summary(cmaxlm)$coef[2,1]
cmaxrsq <- summary(cmaxlm)$r.squared
totalrmsecmax <- sqrt(mean((log10(cmaxfull$Cmaxsim) -
log10(cmaxfull$Cmaxobs))^2, na.rm = TRUE))
cmaxmiss <- nrow(cmaxfull[
abs(log10(cmaxfull$Cmaxsim) -
log10(cmaxfull$Cmaxobs)) > 1,])
cmaxmissp <- nrow(cmaxfull[
abs(log10(cmaxfull$Cmaxsim) -
log10(cmaxfull$Cmaxobs)) > 1,]) /
nrow(cmaxfull) * 100
cmaxmisschem <- table(cmaxfull[
abs(log10(cmaxfull$Cmaxsim) -
log10(cmaxfull$Cmaxobs)) > 1,]$chem)
aucslope <- summary(auclm)$coef[2,1]
aucrsq <- summary(auclm)$r.squared
totalrmseauc <- sqrt(mean((
log10(aucfull$AUCsim) -
log10(aucfull$AUCobs))^2, na.rm = TRUE))
aucmiss <- nrow(aucfull[
abs(log10(aucfull$AUCsim) -
log10(aucfull$AUCobs)) > 1,])
aucmissp <- nrow(aucfull[
abs(log10(aucfull$AUCsim) -
log10(aucfull$AUCobs)) > 1,]) /
nrow(aucfull) * 100
aucmisschem <- table(aucfull[
abs(log10(aucfull$AUCsim) -
log10(aucfull$AUCobs)) > 1,]$chem)
cmaxp <- ggplot(data = cmaxfull, aes(x = log10(Cmaxsim), y = log10(Cmaxobs))) +
geom_point(color =
ifelse(abs(log10(cmaxfull$Cmaxsim) -log10(cmaxfull$Cmaxobs))>=1, "red","black")) +
geom_abline() +
xlab("Log(Simulated Max Concentration)") +
ylab("Log(Observed Max Concentration)") +
theme_bw() +
geom_smooth(method = 'lm', se = FALSE, aes(color = 'Overall')) +
geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
geom_text(x = Inf,
y = -Inf,
hjust = 1.05,
vjust = -0.15,
# size = 6,
label = paste0("Regression slope: ",
round(summary(cmaxlm)$coef[2,1],digits = 2),
"\nRegression R^2: ",
round(summary(cmaxlm)$r.squared,digits = 2))) +
geom_text_repel(
data = cmaxfull[
(log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=1 &
log10(cmaxfull$Cmaxsim) > 2,],
aes(label = paste(chem,species,matrix)),
force = 2,
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = -0.05,
vjust = 0.5) +
scale_y_continuous(lim = c (-1,5)) +
scale_x_continuous(lim = c(-1,5)) +
geom_text_repel(
data = cmaxfull[
(log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))>=1 &
log10(cmaxfull$Cmaxsim) <= 2,],
aes(label = paste(chem,species,matrix)),
nudge_x = 0.0,
nudge_y = -0.2,
force = 2,
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = -0.05,
vjust = 0.5) +
geom_text(
data = cmaxfull[
(log10(cmaxfull$Cmaxsim)-log10(cmaxfull$Cmaxobs))<=-1,],
aes(label = paste(chem,species,matrix)),
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = 0.5,
vjust = -0.7) +
scale_color_discrete(
name = 'Species',
breaks = c("Overall","Human","Rat")) #+
# theme(plot.title = element_text(face = 'bold', size = 10),
# axis.title.x = element_text(face = 'bold', size = 10),
# axis.text.x = element_text(size=8),
# axis.title.y = element_text(face = 'bold', size = 10),
# axis.text.y = element_text(size = 8),
# legend.title = element_text(face = 'bold', size = 8),
# legend.text = element_text(face = 'bold',size = 8))
cmaxp
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 26 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 26 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_text_repel).
## Warning: Removed 5 rows containing missing values (geom_text_repel).
## Warning: Removed 6 rows containing missing values (geom_text).
aucp <- ggplot(
data = aucfull,
aes(x = log10(AUCsim), y = log10(AUCobs))) +
geom_point(color =
ifelse(abs(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=1, "red","black")) +
geom_abline() +
xlab("Log(Simulated AUC)") +
ylab("Log(Observed AUC)") +
theme_bw() +
geom_smooth(method = 'lm', se = FALSE, aes(color = "Overall")) +
geom_smooth(method = 'lm', se = FALSE, aes(color = species)) +
geom_text(
x = Inf,
y = -Inf,
hjust = 1.05,
vjust = -0.15,
# size = 6,
label = paste0(
"Regression slope: ",
round(summary(auclm)$coef[2,1],digits = 2),
"\nRegression R^2: ",
round(summary(auclm)$r.squared,digits = 2))) +
geom_text_repel(
data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))>=1,],
aes(label = paste(chem,species,matrix)),
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = -0.05,
vjust = 0.5) +
scale_y_continuous(lim = c (-2,4)) +
scale_x_continuous(lim = c(-2,4)) +
geom_text(
data = aucfull[(log10(aucfull$AUCsim)-log10(aucfull$AUCobs))<=-1,],
aes(label = paste(chem,species,matrix)),
# size = 5.3,
fontface = 'bold',
color = 'black',
hjust = 0.5,
vjust = -0.8) +
scale_color_discrete(name = 'Species', breaks = c("Overall","Human","Rat")) #+
# theme(
# plot.title = element_text(face = 'bold', size = 15),
# axis.title.x = element_text(face = 'bold', size = 20),
# axis.text.x = element_text(size=16),
# axis.title.y = element_text(face = 'bold', size = 20),
# axis.text.y = element_text(size = 16),
# legend.title = element_text(face = 'bold', size = 16),
# legend.text = element_text(face = 'bold',size = 14))
aucp
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 26 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 26 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_text_repel).
## Warning: Removed 7 rows containing missing values (geom_text).
simobsfull$aggscen <- as.factor(paste(simobsfull$chem,
simobsfull$species,
simobsfull$matrix))
chem.lm <- lm(
log10(simconc) - log10(obsconc) ~
aggscen,
data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,])
chem.res <- resid(chem.lm)
# Number of observations per chemical class
numpt <- simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,] %>%
group_by(chemclass) %>% summarize(n = paste("n =", length(simconc)))
fig3 <- ggplot(
data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = aggscen, y = log10(simconc)-log10(obsconc), fill = chemclass)) +
geom_boxplot() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("Exposure Scenario") +
ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
facet_wrap(vars(chemclass), scales = 'free_x', nrow = 1) + #35 by 12 for poster
theme_bw() +
geom_text(
data = numpt,
aes(x = Inf, y = -Inf, hjust = 1.05, vjust = -0.5, label = n),
size = 10,
colour = 'black',
inherit.aes = FALSE,
parse = FALSE) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5,size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 24),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 30),
axis.title.y = element_text(face = 'bold', size = 30),
axis.text.y = element_text(face = 'bold',size = 25, color = 'black'))
fig3
#pdf("Figure3.pdf", width = 40, height = 13)
#print(fig3)
#dev.off()
figs1a <- ggplot(
data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = tquart, y = log10(simconc)-log10(obsconc))) +
geom_boxplot()+
geom_hline(yintercept = 0)+
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("\nTime Quartile\n") +
ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
theme_bw()+
theme(
axis.text.x = element_text(size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 20),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 20),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 20, face = 'bold'))
figs1a
figs1b <- ggplot(
data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = mw, y = log10(simconc)-log10(obsconc))) +
geom_point()+
geom_hline(yintercept = 0)+
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("\nMolecular Weight (g/mol)\n") +
ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
theme_bw()+
theme(
axis.text.x = element_text(size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 20),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 20),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 20, face = 'bold'))
figs1b
figs1c <- ggplot(
data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = logp, y = log10(simconc)-log10(obsconc))) +
geom_point()+
geom_hline(yintercept = 0)+
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("\nLog P") +
ylab("Log(Simulated Concentration)-\nLog(Observed Concentration)\n") +
theme_bw() +
theme(
axis.text.x = element_text(size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 20),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 20),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 20, face = 'bold'))
figs1c
figs1d <- ggplot(
data = simobsfull[simobsfull$simconc >0 & simobsfull$obsconc > 0,],
aes(x = sol, y = log10(simconc)-log10(obsconc))) +
geom_point()+
geom_hline(yintercept = 0)+
geom_hline(yintercept = 2, linetype = 2)+
geom_hline(yintercept = -2, linetype = 2)+
xlab("\nSolubility (mol/L)") +
ylab("\nLog(Simulated Concentration)-\nLog(Observed Concentration)\n") +
scale_x_log10()+
theme_bw() +
theme(
axis.text.x = element_text(size = 20, face = 'bold'),
strip.text.x = element_text(face = 'bold', size = 20),
legend.position = 'none',
axis.title.x = element_text(face = 'bold', size = 20),
axis.title.y = element_text(face = 'bold', size = 20),
axis.text.y = element_text(size = 20, face = 'bold'))
figs1d
#pdf("FigureS1.pdf", width = 20, height = 20)
plot_grid(figs1a,figs1b,figs1c,figs1d,nrow = 2, labels = c('A','B','C','D'), label_size = 30)
#dev.off()
senschem <- list()
sensslope <- list()
sensrsq <- list()
sensregrmse <- list()
senstotalrmse <- list()
senspmiss <- list()
senscmaxslope <- list()
senscmaxrsq <- list()
senstotalrmsecmax <- list()
sensaucslope <- list()
sensaucrsq <- list()
senstotalrmseauc <- list()
for (i in 1:nrow(simobsfull))
{
simobsfullsens <- subset(simobsfull,simobsfull$chem != simobsfull$chem[i])
senschem[i] <- as.character(simobsfull$chem[i])
senslm <- lm(
log10(simobsfullsens$obsconc[
!is.na(simobsfullsens$simconc) &
simobsfullsens$simconc > 0 &
simobsfullsens$obsconc > 0])
log10(simobsfullsens$simconc[
!is.na(simobsfullsens$simconc) &
simobsfullsens$simconc >0 &
simobsfullsens$obsconc > 0]))
sensslope[i] <- round(summary(senslm)$coef[2,1],digits = 2)
sensrsq[i] <- round(summary(senslm)$r.squared,digits = 2)
sensregrmse[i] <- round(sqrt(mean(lm$residuals^2)),digits = 2)
senstotalrmse[i] <- round(sqrt(mean((
log10(simobsfullsens$simconc[
!is.na(simobsfullsens$simconc) &
simobsfullsens$simconc >0 &
simobsfullsens$obsconc > 0]) -
log10(simobsfullsens$obsconc[
!is.na(simobsfullsens$simconc) &
simobsfullsens$simconc >0 &
simobsfullsens$obsconc > 0]))^2,
na.rm = TRUE)), digits = 2)
senspmiss[i] <- round((nrow(simobsfullsens) -
nrow(simobsfullsens[
!is.na(simobsfullsens$simconc) &
simobsfullsens$simconc >0 &
simobsfullsens$obsconc > 0,])) / nrow(simobsfullsens) * 100,
digits = 2)
senscmaxfull <- subset(simobsfullsens, !duplicated(simobsfullsens$Cmaxobs))
senscmaxlm <- lm(
log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]) ~
log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]),
na.action = na.exclude)
sensaucfull <-subset(simobsfullsens, !duplicated(simobsfullsens$AUCobs))
sensauclm <- lm(
log10(aucfull$AUCobs[aucfull$AUCobs>0]) ~
log10(aucfull$AUCsim[aucfull$AUCobs>0]),
na.action = na.exclude)
senscmaxslope[i] <- round(summary(senscmaxlm)$coef[2,1],digits = 2)
senscmaxrsq[i] <- round(summary(senscmaxlm)$r.squared,digits = 2)
senstotalrmsecmax[i] <- sqrt(mean((log10(senscmaxfull$Cmaxsim[senscmaxfull$Cmaxobs>0]) - log10(senscmaxfull$Cmaxobs[senscmaxfull$Cmaxobs>0]))^2, na.rm = TRUE))
sensaucslope[i] <- round(summary(sensauclm)$coef[2,1],digits = 2)
sensaucrsq[i] <- round(summary(sensauclm)$r.squared,digits = 2)
senstotalrmseauc[i] <- sqrt(mean((log10(sensaucfull$AUCsim[sensaucfull$AUCobs>0]) - log10(sensaucfull$AUCobs[sensaucfull$AUCobs>0]))^2, na.rm = TRUE))
}
sensitivitydf <- data.frame(Chemical <- as.character(senschem),
sensSlope <- as.numeric(sensslope),
sensRsq <- as.numeric(sensrsq),
sensRegrmse <- as.numeric(sensregrmse),
sensTotrmse <- as.numeric(senstotalrmse),
sensPmiss <- as.numeric(senspmiss),
sensCmaxslope <- as.numeric(senscmaxslope),
sensCmaxrsq <- as.numeric(senscmaxrsq),
sensCmaxrmse <- as.numeric(senstotalrmsecmax),
sensAUCslope <- as.numeric(sensaucslope),
sensAUCrsq <- as.numeric(sensaucrsq),
sensAUCrmse <- as.numeric(senstotalrmseauc),
stringsAsFactors=FALSE)
sensitivitydf <- subset(sensitivitydf,!duplicated(sensitivitydf$Chemical....as.character.senschem.))
names(sensitivitydf) <- c('Chemical Dropped','Regression Slope','Regression R^2','Regression RMSE','Orthogonal RMSE', 'Percent Data Censored','Cmax Regression Slope','Cmax Regression R^2','Cmax Orthogonal RMSE','AUC Regression Slope','AUC Regression R^2','AUC Orthogonal RMSE')
notdropped <- c('None',concregslope,concregr2,concregrmse,totalrmse,pmiss,cmaxslope,cmaxrsq,totalrmsecmax,aucslope,aucrsq,totalrmseauc)
sensitivitydf <- rbind(notdropped, sensitivitydf)
sensitivitydf[,-1] <- sapply(sensitivitydf[,-1],as.numeric)
sensitivitydf[,-1] <- round(sensitivitydf[,-1],2)
# Clean up and write file
rm(chem.lm, obvpredplot, p, pdata1, plot.data, plots, relconc, sensaucfull, sensauclm, sensaucrsq, sensaucslope, senschem, senscmaxfull, senscmaxlm, senscmaxrsq, senscmaxslope, senslm, senspmiss, sensregrmse, sensrsq, sensslope, senstotalrmse, senstotalrmseauc, senstotalrmsecmax, solve, AUCrmse, AUCrsq, AUCslope, chem.res, Chemical, Cmaxrmse, Cmaxrsq, Cmaxslope, colors, count, i, j, k, met_col, name, name1, Pmiss, Regrmse, Rsq, Slope, rem, Totrmse)
write.csv(sensitivitydf, 'supptab2.csv',row.names = FALSE)
supptab1 <- subset(unique_scenarios, !duplicated(unique_scenarios$PREFERRED_NAME) | !duplicated(unique_scenarios$SOURCE_CVT) | !duplicated(unique_scenarios$CONC_SPECIES))
for(i in 1:nrow(supptab1)){
tryCatch({
supptab1$Metabolism_Source[i] <- met_data$SOURCE_MET[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Log_P[i] <- met_data$OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Solubility[i] <- met_data$WATER_SOLUBILITY_MOL.L_OPERA_PRED[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Blood_Air_Partition_Coefficient[i] <- met_data$CALC_PBA[met_data$DTXSID %in% supptab1$DTXSID[i]& met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Chem_Class[i] <- met_data$CHEM_CLASS[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Species[i] <- met_data$SPECIES[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Vmax[i] <- met_data$VMAX[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
supptab1$Km[i] <- met_data$KM[met_data$DTXSID %in% supptab1$DTXSID[i] & met_data$SPECIES %in% supptab1$CONC_SPECIES[i]]
}, error = function(e){})
}
supptab1 <- supptab1[c('PREFERRED_NAME','DTXSID','CASRN','Chem_Class','AVERAGE_MASS','Log_P','Solubility','Blood_Air_Partition_Coefficient','Species','Vmax','Km','Metabolism_Source','SOURCE_CVT')]
names(supptab1) <- c('Chemical','DTXSID','CASRN','CAMEO Chemical Class','Molecular Weight (g/mol)','Log P','Solubility (mol/L)','Blood Air Partition Coefficient','Available Species Data','Vmax (pmol/min/10^6 cells)','KM (uM)','Metabolism Data Source','Concentration-Time Data Source')
write.csv(supptab1, "supptab1.csv", row.names = FALSE)