Overview

The R programming environment provides many ways to visualise and analyse DEB parameters. This ‘work-in-progress’ document shows how to bring the AmP collection into R in two ways, by ‘web-scraping’ and by working with the allStat.mat file. The latter is most powerful but the former can be useful in some situations.

Web scraping

Obtaining data directly from the web is a useful skill and can be done in R through a variety of packages and approaches. Here we use ‘rvest’ package.

library(rvest)
## Loading required package: xml2
library(knitr)

Included here are three example functions that will obtain data from different pages of the AmP web site.

The first one, ‘getDEB.species’, goes to the ‘species_list’ page on the AmP site and obtains all the classification, model fit and completeness information. The correct nodes to obtain the relevant data were determined using selectorgadget which is a plug-in for your browser allowing you to click on different parts of a webpage and get the node names.

getDEB.species <- function() {
    require(rvest)
    library(rvest)
    url <- "https://www.bio.vu.nl/thb/deb/deblab/add_my_pet/species_list.html"
    d1 <- read_html(url)
    
    phylum <- d1 %>% html_nodes("td:nth-child(1)") %>% html_text()
    class <- d1 %>% html_nodes("td:nth-child(2)") %>% html_text()
    order <- d1 %>% html_nodes("td:nth-child(3)") %>% html_text()
    family <- d1 %>% html_nodes("td:nth-child(4)") %>% html_text()
    species <- d1 %>% html_nodes("td:nth-child(5)") %>% html_text()
    common <- d1 %>% html_nodes("td:nth-child(6)") %>% html_text()
    type <- d1 %>% html_nodes("td:nth-child(7)") %>% html_text()
    mre <- d1 %>% html_nodes("td:nth-child(8)") %>% html_text()
    smre <- d1 %>% html_nodes("td:nth-child(9)") %>% html_text()
    complete <- d1 %>% html_nodes("td:nth-child(10)") %>% html_text()
    all.species <- as.data.frame(cbind(phylum, class, order, 
        family, species, common, type, mre, smre, complete), 
        stringsAsFactors = FALSE)
    all.species$species <- gsub(" ", "_", all.species$species)
    all.species$mre <- as.numeric(mre)
    all.species$smre <- as.numeric(smre)
    all.species$complete <- as.numeric(complete)
    return(all.species)
}

The next one, ‘getDEB.pars’ gets all the parameters from the pars page of a species of interest from the list of species obtained with ‘getDEB.species’. Due to the structure of the tables for the chemical composition parameters the code is a little less straight-forward.

getDEB.pars <- function(species) {
    require(rvest)
    library(rvest)
    baseurl <- "https://www.bio.vu.nl/thb/deb/deblab/add_my_pet/entries_web/"
    d1 <- read_html(paste0(baseurl, species, "/", species, "_par.html"))
    symbol1 <- d1 %>% html_nodes("td:nth-child(1)") %>% html_text()
    
    value1 <- d1 %>% html_nodes("td:nth-child(2)") %>% html_text()
    
    units1 <- d1 %>% html_nodes("td:nth-child(3)") %>% html_text()
    
    description1 <- d1 %>% html_nodes("td:nth-child(4)") %>% 
        html_text()
    
    extra1 <- d1 %>% html_nodes("td:nth-child(5)") %>% html_text()
    
    extra2 <- d1 %>% html_nodes("td:nth-child(6)") %>% html_text()
    end <- which(symbol1 == "T_ref")
    symbol <- symbol1[1:end]
    value <- value1[1:end]
    units <- units1[1:end]
    description <- description1[1:end]
    
    pars <- as.data.frame(cbind(symbol, value, units, description))
    pars$symbol <- as.character(symbol)
    pars$value <- as.numeric(value)
    pars$units <- as.character(units)
    pars$description <- as.character(description)
    
    chempot <- c(value1[end + 1], units1[end + 1], description1[end + 
        1], extra1[1])
    dens <- c(value1[end + 2], units1[end + 2], description1[end + 
        2], extra1[2])
    org.C <- c(units1[end + 3], description1[end + 3], extra1[3], 
        extra2[1])
    org.H <- c(value1[end + 4], units1[end + 4], description1[end + 
        4], extra1[4])
    org.O <- c(value1[end + 5], units1[end + 5], description1[end + 
        5], extra1[5])
    org.N <- c(value1[end + 6], units1[end + 6], description1[end + 
        6], extra1[6])
    min.C <- c(units1[end + 7], description1[end + 7], extra1[7], 
        extra2[2])
    min.H <- c(value1[end + 8], units1[end + 8], description1[end + 
        8], extra1[8])
    min.O <- c(value1[end + 9], units1[end + 9], description1[end + 
        9], extra1[9])
    min.N <- c(value1[end + 10], units1[end + 10], description1[end + 
        10], extra1[10])
    
    organics <- rbind(org.C, org.H, org.O, org.N)
    minerals <- rbind(min.C, min.H, min.O, min.N)
    colnames(organics) <- c("X", "V", "E", "P")
    colnames(minerals) <- c("CO2", "H2O", "O2", "N-waste")
    rownames(organics) <- c("C", "H", "O", "N")
    rownames(minerals) <- c("C", "H", "O", "N")
    class(chempot) <- "numeric"
    class(dens) <- "numeric"
    class(organics) <- "numeric"
    class(minerals) <- "numeric"
    
    return(list(pars = pars, chempot = chempot, dens = dens, 
        organics = organics, minerals = minerals))
}

Finally, ‘getDEB.implied.R’ obtains all the implied properties.

getDEB.implied <- function(species) {
    require(rvest)
    library(rvest)
    baseurl <- "https://www.bio.vu.nl/thb/deb/deblab/add_my_pet/entries_web/"
    d1 <- read_html(paste0(baseurl, species, "/", species, "_stat.html"))
    symbol <- d1 %>% html_nodes("td:nth-child(1)") %>% html_text()
    
    value <- d1 %>% html_nodes("td:nth-child(2)") %>% html_text()
    
    units <- d1 %>% html_nodes("td:nth-child(3)") %>% html_text()
    
    description <- d1 %>% html_nodes("td:nth-child(4)") %>% html_text()
    
    final <- as.data.frame(cbind(symbol, value, units, description))
    final$symbol <- as.character(symbol)
    final$value <- as.numeric(value)
    final$units <- as.character(units)
    final$description <- as.character(description)
    return(final)
}

Here are all the Squamata currently in the collection on the web, as returned by ‘allDEB.species’.

allDEB.species <- getDEB.species()
head(allDEB.species)
kable(subset(allDEB.species, order == "Squamata"))
phylum class order family species common type mre smre complete
547 Chordata Reptilia Squamata Gekkonidae Heteronotia_binoei Bynoe’s gecko CA6 std 0.142 0.175 2.8
548 Chordata Reptilia Squamata Gekkonidae Heteronotia_binoei_3N1A Bynoe’s gecko 3N1A std 0.117 0.146 2.8
549 Chordata Reptilia Squamata Gekkonidae Heteronotia_binoei_3N1B Bynoe’s gecko 3N1B std 0.130 0.157 2.8
550 Chordata Reptilia Squamata Gekkonidae Heteronotia_binoei_EA6 Bynoe’s gecko EA6 std 0.135 0.147 2.8
551 Chordata Reptilia Squamata Gekkonidae Heteronotia_binoei_SM6 Bynoe’s gecko SM6 std 0.153 0.180 2.8
552 Chordata Reptilia Squamata Scincidae Eulamprus_quoyii Eastern Water Skink std 0.146 0.147 2.5
553 Chordata Reptilia Squamata Scincidae Tiliqua_rugosa Sleepy Lizard std 0.109 0.118 2.7
554 Chordata Reptilia Squamata Scincidae Egernia_cunninghami Cunningham’s Skink std 0.232 0.247 2.5
555 Chordata Reptilia Squamata Scincidae Egernia_striolata Tree Skink std 0.204 0.239 2.5
556 Chordata Reptilia Squamata Scincidae Liopholis_striata Night Skink std 0.151 0.160 2.5
557 Chordata Reptilia Squamata Scincidae Liopholis_inornata Desert Skink std 0.122 0.126 2.5
558 Chordata Reptilia Squamata Amphisbaenidae Amphisbaena_alba Red worm lizard std 0.002 0.003 1.5
559 Chordata Reptilia Squamata Lacertidae Lacerta_agilis Sand lizard std 0.045 0.047 2.8
560 Chordata Reptilia Squamata Lacertidae Lacerta_strigata Caucusus Emerald Lizard std 0.049 0.053 2.8
561 Chordata Reptilia Squamata Lacertidae Takydromus_hsuehshanensis Snow Mountain grasslizard std 0.165 0.180 2.8
562 Chordata Reptilia Squamata Varanidae Varanus_komodoensis Komodo dragon std 0.070 0.073 2.5
563 Chordata Reptilia Squamata Anguidae Anguis_fragilis Slow worm std 0.106 0.100 2.5
564 Chordata Reptilia Squamata Chamaeleonidae Furcifer_labordi Labords chameleon std 0.109 0.112 2.5
565 Chordata Reptilia Squamata Agamidae Ctenophorus_ornatus Ornate dragon std 0.134 0.134 2.6
566 Chordata Reptilia Squamata Iguanidae Cyclura_pinguis Anegada ground iguana std 0.062 0.068 2.5
567 Chordata Reptilia Squamata Phrynosomatidae Sceloporus_undulatus Eastern fence lizard std 0.088 0.132 1.7
568 Chordata Reptilia Squamata Phrynosomatidae Sceloporus_occidentalis Western fence lizard std 0.096 0.095 2.5
569 Chordata Reptilia Squamata Phrynosomatidae Phrynosoma_ditmarsi Rock horned lizard std 0.033 0.060 2.8
570 Chordata Reptilia Squamata Phrynosomatidae Phrynosoma_douglasii Pygmy short-horned lizard std 0.014 0.024 2.5
571 Chordata Reptilia Squamata Pythonidae Python_regius Ball python std 0.246 0.171 3.8
572 Chordata Reptilia Squamata Pythonidae Apodora_papuana Irian python std 0.068 0.073 2.5
573 Chordata Reptilia Squamata Boidae Eunectes_murinus Green anaconda std 0.112 0.129 2.6
574 Chordata Reptilia Squamata Boidae Eunectes_notaeus Yellow anaconda std 0.105 0.100 2.6
575 Chordata Reptilia Squamata Boidae Boa_constrictor Columbian boa std 0.241 0.220 2.6
576 Chordata Reptilia Squamata Viperidae Vipera_berus Common European adder std 0.073 0.072 2.7
577 Chordata Reptilia Squamata Viperidae Crotalus_horridus Timber rattlesnake std 0.048 0.055 2.8
578 Chordata Reptilia Squamata Colubridae Coronella_austriaca Smooth snake std 0.090 0.101 2.5
579 Chordata Reptilia Squamata Colubridae Natrix_natrix Grass snake std 0.147 0.150 2.6

Now you can extract data for a particular species - the sand lizard Lacerta agilis. First, here are the parameters as extracted by ‘getDEB.pars’.

allpars <- getDEB.pars("Lacerta_agilis")
pars <- allpars$pars
chempot <- allpars$chempot
dens <- allpars$dens
organics <- allpars$organics
minerals <- allpars$minerals

kable(pars)
symbol value units description
p_Am 155.91200 J/d.cm^2 {p_Am}, spec assimilation flux
F_m 6.50000 l/d.cm^2 {F_m}, max spec searching rate
kap_X 0.80000 - digestion efficiency of food to reserve
kap_P 0.10000 - faecation efficiency of food to faeces
v 0.02887 cm/d energy conductance
kap 0.79300 - allocation fraction to soma
kap_R 0.95000 - reproduction efficiency
p_M 65.73000 J/d.cm^3 [p_M], vol-spec somatic maint
p_T 0.00000 J/d.cm^2 {p_T}, surf-spec somatic maint
k_J 0.00200 1/d maturity maint rate coefficient
E_G 7832.00000 J/cm^3 [E_G], spec cost for structure
E_Hb 390.80000 J maturity at birth
E_Hp 18970.00000 J maturity at puberty
h_a 0.00000 1/d^2 Weibull aging acceleration
s_G 0.00010 - Gompertz stress coefficient
del_M 0.17650 - shape coefficient
f 1.00000 - scaled functional response for 0-var data
f_Kh 0.95120 - scaled functional response at Khuchni
f_Ko 0.90610 - scaled functional response at Kostek
f_Ku 1.00000 - scaled functional response at Kuli
f_Se 0.85700 - scaled functional response at Sergokala
f_Te 0.91460 - scaled functional response at Termenlik
z_m 1.96100 - zoom factor for males
T_A 8000.00000 K Arrhenius temperature
T_ref 293.15000 K Reference temperature
kable(chempot, align = "l")
x
525000
500000
550000
480000
kable(dens, align = "l")
x
0.3
0.3
0.3
0.3
kable(organics)
X V E P
C 1.00 1.00 1.00 1.00
H 1.80 1.80 1.80 1.80
O 0.50 0.50 0.50 0.50
N 0.15 0.15 0.15 0.15
kable(minerals)
CO2 H2O O2 N-waste
C 1 0 0 1.0
H 0 2 0 0.8
O 2 1 2 0.6
N 0 0 0 0.8

And here are the implied parameters for this species.

implied <- getDEB.implied("Lacerta_agilis")
kable(implied)
symbol value units description
z 1.88100e+00 - zoom factor
c_T 1.72866e+00 - Temperature Correction factor
s_Hbp 2.06009e-02 - maturity ratio
s_HLbp 4.59333e-01 - maturity density ratio at f=1
s_s 4.32501e-02 - supply stress
E_0 2.87037e+03 J initial reserve
Wd_0 1.24731e-01 g initial dry weight
a_b 4.35333e+01 d age at birth
a_p 4.80482e+02 d age at puberty
a_99 1.40652e+03 d age at length 0.99 * L_i
Wd_b 8.77178e-02 g dry weight at birth
Wd_p 1.95582e+00 g dry weight at puberty
Wd_i 3.55842e+00 g ultimate dry weight
L_b 5.47435e-01 cm structural length at birth
L_p 1.54080e+00 cm structural length at puberty
L_i 1.88100e+00 cm ultimate structural length
W_dWm 3.51448e+00 g wet weight at maximum growth
dWm 4.88340e-03 g/d maximum growth in wet weight
R_i 4.36248e-02 1/d ultimate reproduction rate
N_i 5.58396e+01 # life time reproductive output
del_Wb 2.46508e-02 - birth weight as fraction of maximum weight
del_Wp 5.49631e-01 - puberty weight as fraction of maximum weight
del_V 5.61088e-01 - fraction of max weight that is structure
r_B 3.12640e-03 1/d von Bertalanffy growth rate
E_m 5.40048e+03 J/cm^3 [E_m], reserve capacity
t_starve 4.75291e+01 d maximum survival time when starved
t_E 3.76905e+01 d maximum reserve residence time
xi_WE 2.18387e+01 kJ/ g whole-body energy density of dry biomass (no reprod buffer)
eb_min_G 2.83278e-01 - scaled reserve density whereby growth ceases at birth
eb_min_R 8.35218e-02 - scaled reserve density whereby maturation ceases at birth
J_Ob 9.02000e-05 mol/d O2 flux at birth
J_Op 1.22450e-03 mol/d O2 flux at puberty
J_Oi 1.85060e-03 mol/d ultimate O2 flux

Working with the allStat.mat file in R

The AmPtool github repository houses the file ‘allStat.mat’, which includes a larger array of parameters and implied properties for all species in the AmP collection. This file can be read into R with the ‘R.matlab’ package and saved as an R data object. The initial read of the .mat file takes a while (5-10 minutes), but the ‘.Rda’ file to which it is converted loads much faster (seconds).

Here is the code to read and convert the ‘allStat.mat’ file.

library(R.matlab)
allStat <- readMat("allStat.mat")  # this will take a while, approx. 6 mins on a DELL PRECISION M4800
save(allStat, file = "allstat.Rda")  # save it as an R data file for faster future loading

Now this file can be read into memory in R and manipulated to extract parameter values for different species or taxa. The file is a list of lists.

For example, you can get a list and count of all the species in the collection (in this case, as of 18th August, 2018):

load("allStat.Rda")

allDEB.species <- unlist(labels(allStat$allStat))  # get all the species names
allDEB.species <- allDEB.species[1:(length(allDEB.species) - 
    2)]  # last two elements are not species names
kable(head(allDEB.species))
x
Haliclona.oculata
Ptilosarcus.gurneyi
Chironex.fleckeri
Hydra.viridissima
Pelagia.noctiluca
Cyanea.capillata
Nspecies <- length(allStat$allStat)
Nspecies
## [1] 1194

And you can extract all parameters for a given species. Here it is done by using the ‘assign’ function First, the slot in the top-level list of species is found which matches the species name chosen. Then, the parameter names for that entry are extracted. Finally, all elements of the lists of data for that species are extracted and assigned names corresponding to the parameter names just extracted, using the ‘assign’ function, using a for loop. Once it has run, all the parameters, implied properties and details about the entry are thus loaded into the memory with appropriate names, ready for use.

species <- "Lacerta.agilis"
species.slot <- which(allDEB.species == species)
par.names <- unlist(labels(allStat$allStat[[species.slot]]))

for (i in 1:length(par.names)) {
    assign(par.names[i], unlist(allStat$allStat[[species.slot]][i]))
}

Here’s the full list of par names extracted for Lacerta agilis.

par.names
##   [1] "species"    "units"      "label"      "species.en" "family"    
##   [6] "order"      "class"      "phylum"     "model"      "MRE"       
##  [11] "SMSE"       "COMPLETE"   "data"       "author"     "date.subm" 
##  [16] "author.mod" "date.mod"   "date.acc"   "T.typical"  "z"         
##  [21] "F.m"        "kap.X"      "kap.P"      "v"          "kap"       
##  [26] "kap.R"      "p.M"        "p.T"        "k.J"        "E.G"       
##  [31] "E.Hb"       "E.Hp"       "h.a"        "s.G"        "T.A"       
##  [36] "T.ref"      "del.M"      "f"          "f.Kh"       "f.Ko"      
##  [41] "f.Ku"       "f.Se"       "f.Te"       "z.m"        "d.X"       
##  [46] "d.V"        "d.E"        "d.P"        "mu.X"       "mu.V"      
##  [51] "mu.E"       "mu.P"       "mu.C"       "mu.H"       "mu.O"      
##  [56] "mu.N"       "n.CX"       "n.HX"       "n.OX"       "n.NX"      
##  [61] "n.CV"       "n.HV"       "n.OV"       "n.NV"       "n.CE"      
##  [66] "n.HE"       "n.OE"       "n.NE"       "n.CP"       "n.HP"      
##  [71] "n.OP"       "n.NP"       "n.CC"       "n.HC"       "n.OC"      
##  [76] "n.NC"       "n.CH"       "n.HH"       "n.OH"       "n.NH"      
##  [81] "n.CO"       "n.HO"       "n.OO"       "n.NO"       "n.CN"      
##  [86] "n.HN"       "n.ON"       "n.NN"       "T"          "c.T"       
##  [91] "p.Am"       "w.X"        "w.V"        "w.E"        "w.P"       
##  [96] "M.V"        "y.V.E"      "y.E.V"      "k.M"        "k"         
## [101] "E.m"        "m.Em"       "g"          "L.m"        "L.T"       
## [106] "l.T"        "w"          "J.E.Am"     "y.E.X"      "y.X.E"     
## [111] "p.Xm"       "J.X.Am"     "y.P.X"      "y.X.P"      "y.P.E"     
## [116] "eta.XA"     "eta.PA"     "eta.VG"     "J.E.M"      "J.E.T"     
## [121] "j.E.M"      "j.E.J"      "kap.G"      "E.V"        "K"         
## [126] "M.Hb"       "U.Hb"       "V.Hb"       "u.Hb"       "v.Hb"      
## [131] "M.Hp"       "U.Hp"       "V.Hp"       "u.Hp"       "v.Hp"      
## [136] "t.E"        "t.starve"   "s.M"        "r.B"        "W.dWm"     
## [141] "dWm"        "U.E0"       "E.0"        "M.E0"       "Wd.0"      
## [146] "Ww.0"       "l.b"        "L.b"        "M.Vb"       "del.Ub"    
## [151] "Ww.b"       "Wd.b"       "E.Wb"       "a.b"        "g.Hb"      
## [156] "s.Hbp"      "s.HLbp"     "l.p"        "L.p"        "M.Vp"      
## [161] "M.Ep"       "Ww.p"       "Wd.p"       "E.Wp"       "a.p"       
## [166] "g.Hp"       "s.s"        "l.i"        "L.i"        "M.Vi"      
## [171] "M.Ei"       "Ww.i"       "Wd.i"       "E.Wi"       "xi.WE"     
## [176] "del.Wb"     "del.Wp"     "del.V"      "h.W"        "h.G"       
## [181] "a.m"        "S.b"        "S.p"        "a.99"       "R.i"       
## [186] "N.i"        "M.E0.min.G" "M.E0.min.R" "eb.min.G"   "eb.min.R"  
## [191] "ep.min"     "sM.min"     "p.Xb"       "J.Xb"       "F.mb"      
## [196] "p.Xp"       "J.Xp"       "F.mp"       "p.Xi"       "J.Xi"      
## [201] "F.mi"       "p.Ab"       "p.Cb"       "p.Sb"       "p.Jb"      
## [206] "p.Gb"       "p.Rb"       "p.Db"       "p.Ap"       "p.Cp"      
## [211] "p.Sp"       "p.Jp"       "p.Gp"       "p.Rp"       "p.Dp"      
## [216] "p.Ai"       "p.Ci"       "p.Si"       "p.Ji"       "p.Gi"      
## [221] "p.Ri"       "p.Di"       "J.Cb"       "J.Cp"       "J.Ci"      
## [226] "J.Ob"       "J.Op"       "J.Oi"       "J.Nb"       "J.Np"      
## [231] "J.Ni"       "RQ.b"       "UQ.b"       "WQ.b"       "SDA.b"     
## [236] "VO.b"       "p.Tb"       "RQ.p"       "UQ.p"       "WQ.p"      
## [241] "SDA.p"      "VO.p"       "p.Tp"       "RQ.i"       "UQ.i"      
## [246] "WQ.i"       "SDA.i"      "VO.i"       "p.Ti"       "1"         
## [251] "1"

And here’s an example of plotting the implied mass-specific oxygen consumption rate as a function of dry mass for all the squamata (lizards and snakes) in the collection.

pars.wanted <- c("Wd.i", "VO.i")  # choose parameters of interest

# make an empty matrix to put the values in
all.VOi <- matrix(NA, nrow = length(allDEB.species), ncol = length(pars.wanted))

# loop through all species, get the parameters
for (i in 1:length(allDEB.species)) {
    par.names <- unlist(labels(allStat$allStat[[i]]))
    for (j in 1:length(pars.wanted)) {
        par.slot <- which(par.names == pars.wanted[j])
        all.VOi[i, j] <- unlist(allStat$allStat[[i]][par.slot])
    }
}

# get just the values for a given order
order.slot <- which(par.names == "order")  # find which item contains the taxonomic order
order.wanted <- "Squamata"  # choose order of interest

# loop through all species and find those matching the chosen
# order
all.orders <- vector()
for (i in 1:length(allDEB.species)) {
    all.orders[i] <- unlist(allStat$allStat[[c(i, order.slot)]])
}

# subset the extracted parameters for just those species in
# the chosen order
squam.voi <- all.VOi[which(all.orders %in% order.wanted), ]

plot(log10(squam.voi[, 1]), log10(squam.voi[, 2] * -1), ylab = "log10(O2 consumption rate, mol/g/day)", 
    xlab = "log10(dry mass, g)")

# estimate the slope and plot it
estWO2 <- coef(summary(lm(log10(squam.voi[, 2] * -1) ~ log10(squam.voi[, 
    1]))))
slp <- estWO2[2]
abline(estWO2[1], estWO2[2])
text(3, -3, paste0("slope = ", round(slp, 2)))