# Ancestral state reconstruction (continuous) # Once we have our phylogeny and data paired, we can reconstruct character evolution by # inferring ancestral character states. # 1) Make a working directory (e.g., discrete) and select it as your working. setwd("/Users/jcsantos/Desktop/R_class_winter_2015_home/0_Bio559R_course_final_files/week_8_office/data") # 2) We need to download these files from the course website into our working directory: bird_data_pruned.txt bird_pruned_MB.newick tyrannidae_reduced_data.txt tyrannidae_reduced_tree.newick # 3) Several packages and functions are available for this purpose. library(ape) library(geiger) library(phytools) # 4) Load our trees and data from last class and check concordance: bird_pruned_tree <- read.tree("bird_pruned_MB.newick") bird_pruned_tree birds_traits_data <- read.table("bird_data_pruned.txt", header = TRUE, sep = "\t") birds_traits_data # use the 'name.check' function to determine if we have concordance name.check (bird_pruned_tree, birds_traits_data) # we check the concordance between a data file and a phylogenetic tree. # [1] "OK" # we have concordance (i.e., we the same number of tips and data per tip) # this is your exercise: tyrannidae_reduced_tree <- read.tree("tyrannidae_reduced_tree.newick") tyrannidae_reduced_data <- read.table("tyrannidae_reduced_data.txt", header = TRUE, sep = "\t") name.check (tyrannidae_reduced_tree, tyrannidae_reduced_data) # we check the concordance between a data file and a phylogenetic tree. # [1] "OK" # we have concordance #5) We need to make sure that our tree is ultrametric tree and preferably bifurcating. is.ultrametric(bird_pruned_tree) # TRUE is.binary.tree(bird_pruned_tree) # TRUE all nodes (including the root node) have exactly two descendant nodes is.ultrametric(tyrannidae_reduced_tree) # TRUE is.binary.tree(tyrannidae_reduced_tree) # TRUE all nodes (including the root node) have exactly two descendant nodes #6) Let's explore our continuous data for both all birds by Order library(ggplot2) ## No log10 ggplot(birds_traits_data, aes(x = Order, y = Mass_g/1000, fill = Order)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Aves -- Order", y = "Body Mass (kg)", fill = NULL) ## log10 ggplot(birds_traits_data, aes(x = Order, y = log10(Mass_g), fill = Order)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Aves -- Order", y = "Log10 Body Mass (g)", fill = NULL) #7) Let's explore our continuous data for both all birds by Family ## No log10 ggplot(birds_traits_data, aes(x = Family, y = Mass_g/1000, fill = Family)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Aves -- Family", y = "Body Mass (kg)", fill = NULL) ## log10 ggplot(birds_traits_data, aes(x = Family, y = log10(Mass_g), fill = Family)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Aves -- Family", y = "Log10 Body Mass (g)", fill = NULL) #7) It is apparent that at the Aves level the data is not normal, but within Tyrannidae it might not be necessary # Let's test for normality shapiro.test(birds_traits_data$Mass_g)# Aves level # Shapiro-Wilk normality test # # data: birds_traits_data$Mass_g # W = 0.1023, p-value < 2.2e-16 #Let's create a variable that has the log10 transformed body mass data birds_traits_data$log10_Mass_g <- log10(birds_traits_data$Mass_g) head(birds_traits_data) shapiro.test(birds_traits_data$log10_Mass_g)# Aves level # Shapiro-Wilk normality test # # data: birds_traits_data$log10_Mass_g # W = 0.9479, p-value = 1.69e-11 #The test is very conservative, but we can explore QQ plots (Quantile-Quantile Plots) #8) Let's plot the QQ plots, Quantile-Quantile Plots install.packages ("nortest") #test for normality library(nortest) par(mfrow=c(1,2)) qqnorm(birds_traits_data$Mass_g, main = "Normal Q-Q Plot -- Aves -- Body Mass") qqnorm(birds_traits_data$log10_Mass_g, main = "Normal Q-Q Plot -- Aves -- log10 Body Mass") # The log transformation has improved the distribution. It is not normal, but given the # large sample size, it is not that important for regression analyses. # other transformations are possible, but the interpretation of such transformed data # will be even harder. It will not change the results if we use robust statistical methods. # See presentation for information about transformations #9) plot data for bird tree for continuous variable # We need to sort variable values to match phylogeny names order match_birds_data <- match(bird_pruned_tree$tip.label, rownames(birds_traits_data)) #use match function sorted_birds_data <- as.data.frame(birds_traits_data[match_birds_data,]) sorted_birds_data # print phylogeny next to transform data matrix_graph <- matrix(c(1,2),1) # matrix to locate plots, plot 1 -- left; plot 2 -- right layout(matrix_graph, c(0.6,0.4)) par(mar=c(4.1,1,1,0)) #A numerical vector of the form c(bottom, left, top, right) which gives the number of lines of margin plot(bird_pruned_tree, show.tip.label=FALSE) #Plot a tree that leaves some room between the tree tips and taxon labels so that we can plot habitat use in this space par(mar=c(4.1,0,1,1)) #A numerical vector of the form c(bottom, left, top, right) which gives the number of lines of margin barplot(sorted_birds_data$log10_Mass_g,horiz=TRUE,width=1,space=0,names="") ############################################################################################################## ############################################################################################################## ############################################################################################################## # Using 'ape' #10) We will use the ace (Ancestral Character Estimation) function of ape to reconstruct ancestral character states using maximum likelihood: # ace(x, phy, type = "continuous", # method = "REML" # first estimates the ancestral value at the root (aka, the phylogenetic mean), # then the variance of the Brownian motion process is estimated by optimizing # the residual log-likelihood. The ancestral values are finally inferred from # the likelihood function giving these two parameters. # method = "pic" # the confidence intervals are computed using the expected variances under the model, #so they depend only on the tree. # method = "GLS" # similar to "pic" # # CI = TRUE, # 95% confidence intervals # model = "BM", # Only Brownian motion option # scaled = TRUE, # kappa = 1, # if branch lengths needs transformation, a positive value giving the exponent for such transformation # corStruct = NULL, # ip = 0.1, # use.expm = FALSE, # use.eigen = TRUE, # marginal = FALSE) # returns the node marginal reconstruction (likelihood values) at each node # 9) Let's explore the method "REML": # This is the Brownian motion-based maximum likelihood (ML) estimator of Schluter et. al. (1997). # We use REML to provide unbiased estimates (compared to "ML") of the variance of the Brownian motion process log10_Mass_bird_ace_REML <- ace(birds_traits_data$log10_Mass_g, bird_pruned_tree, type="continuous", model = "REML", marginal = TRUE) log10_Mass_bird_ace_REML # Ancestral Character Estimation # Call: ace(x = birds_traits_data$log10_Mass_g, phy = bird_pruned_tree, # type = "continuous", model = "REML", marginal = TRUE) # # Residual log-likelihood: 1757.884 <--- These are loglik (the log-likelihood of the most likely reconstruction) # # $ace <--_ ace These form a vector of reconstructed node values # # 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 # 3.0204793 3.2499122 3.0810056 3.3392881 3.7115866 3.5490073 2.9503006 2.9305053 2.6328168 2.0877089 2.6112151 2.5544724 2.6542317 2.6849797 1.8522521 2.0165004 2.6543825 # ... # # $sigma2 <--- A two element vector of the rate estimate and its standard error # [1] 0.0094210721 0.0006407525 # # $CI95 <-- The 95% confidence intervals around the vector of reconstructed node values # [,1] [,2] # 452 2.1130802 3.927878 # 10) Let's explore the method "pic": # This is also a Brownian-motion based model used Felsenstein's (1985) phylogenetic independent contrasts (pic). # However, it only takes descendants of each node into account in reconstructing the state at that node. # More basal nodes are ignored. log10_Mass_bird_ace_pic <- ace(birds_traits_data$log10_Mass_g, bird_pruned_tree, type="continuous", model = "pic", marginal = TRUE) log10_Mass_bird_ace_pic # Ancestral Character Estimation # Call: ace(x = birds_traits_data$log10_Mass_g, phy = bird_pruned_tree, # type = "continuous", model = "pic", marginal = TRUE) # # Residual log-likelihood: 1757.884 <--- These are loglik (the log-likelihood of the most likely reconstruction) # # $ace <--_ ace These form a vector of reconstructed node values # # 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 # 3.0204793 3.2499122 3.0810056 3.3392881 3.7115866 3.5490073 2.9503006 2.9305053 2.6328168 2.0877089 2.6112151 2.5544724 2.6542317 2.6849797 1.8522521 2.0165004 2.6543825 # ... # # $sigma2 <--- A two element vector of the rate estimate and its standard error # [1] 0.0094210721 0.0006407525 # # $CI95 <-- The 95% confidence intervals around the vector of reconstructed node values # [,1] [,2] # 452 2.1130802 3.927878 birds_log10_Mass_g <- c(birds_traits_data$log10_Mass_g, log10_Mass_bird_ace_pic$ace) # 10) Let's plot our reconstructions in our tree plot(bird_pruned_tree, type = "fan", show.tip.label=FALSE) tiplabels(pch = 21, cex=birds_traits_data$log10_Mass_g/2) nodelabels(pch = 21, cex=log10_Mass_bird_ace_pic$ace/2) axisPhylo() ####################################### Phytools ################################################# #11) For this package, we need to isolated our trait of interest and then give the # corresponding names in the phylogeny birds_traits_data_log10_Mass_g <- birds_traits_data$log10_Mass_g names(birds_traits_data_log10_Mass_g) <- birds_traits_data$Genus_Species # 12) Let's try the reconstructions using phytools. # fastAnc (from the manual) # # This function performs fast estimation of the ML ancestral states for a continuous trait # by taking advantage of the fact that the state computed for the root node of the tree # during Felsenstein's (1985) contrasts algorithm is also the maximum-likelihood estimation (MLE) # of the root node. Thus, the function reroots the tree at all internal nodes and computes # the contrasts state at the root each time. log10_Mass_bird_fastAnc <- fastAnc(bird_pruned_tree, birds_traits_data_log10_Mass_g, CI = TRUE) log10_Mass_bird_fastAnc # $ace <--- ancestral state estimates # 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 # 2.8144783 3.2564965 3.8268806 3.8844212 3.3854237 3.3461974 2.4683799 2.5775496 2.4386633 2.1295003 2.5220832 2.4198142 2.1598356 2.0405282 2.0243548 2.7157835 2.7011164 # ... # # $CI95 <--- Upper and lower 95-percent confidence intervals # [,1] [,2] # 452 2.0892972 3.539659 # 453 2.5456000 3.967393 # 454 3.1860535 4.467708 # ... # anc.ML (from the manual) # # This function estimates the evolutionary parameters and ancestral states for Brownian evolution using likelihood. # It is also possible (for model="BM") to allow for missing data for some tip taxa. log10_Mass_bird_ancML_BM <- anc.ML(bird_pruned_tree, birds_traits_data_log10_Mass_g, model="BM") log10_Mass_bird_ancML_BM # The results should be similar to the fastAnc # $sig2 # # 0.003024751 # # $ace # 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 # 2.8144783 3.2564965 3.8268806 3.8844212 3.3854237 3.3461974 2.4683799 2.5775496 2.4386633 2.1295003 2.5220832 2.4198142 2.1598356 2.0405282 2.0243548 2.7157835 2.7011164 # ... # $logLik # [1] 522.9801 # # $counts # function gradient # 21 21 # # $convergence # [1] 52 # # $message # [1] "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH" # # $model # [1] "BM" # # attr(,"class") # [1] "anc.ML" # This applies for the OU model #log10_Mass_bird_ancML_OU <- anc.ML(bird_pruned_tree, birds_traits_data_log10_Mass_g, model="OU") #log10_Mass_bird_ancML_OU In mathematics, the Ornstein–Uhlenbeck process (named after Leonard Ornstein and George Eugene Uhlenbeck), is a stochastic process that, roughly speaking, describes the velocity of a massive Brownian particle under the influence of friction. The process is stationary, Gaussian, and Markovian, and is the only nontrivial process that satisfies these three conditions, up to allowing linear transformations of the space and time variables.[1] Over time, the process tends to drift towards its long-term mean: such a process is called mean-reverting. The process can be considered to be a modification of the random walk in continuous time, or Wiener process, in which the properties of the process have been changed so that there is a tendency of the walk to move back towards a central location, with a greater attraction when the process is further away from the centre. The Ornstein–Uhlenbeck process can also be considered as the continuous-time analogue of the discrete-time AR(1) process. # 13) Plot our reconstructions in our tree (from example of phytools) log10_Mass_bird_fastAnc_contMap <- contMap(bird_pruned_tree, birds_traits_data_log10_Mass_g, spread.labels = TRUE) plot(log10_Mass_bird_fastAnc_contMap, type = "fan", show.tip.label=FALSE) ########################################################################################## ########################################################################################## ###################################### MARCH 3rd ######################################### ########################################################################################## ########################################################################################## Last class continuation on Continuous Character Reconstrution: # working directory setwd("/Users/jcsantos/Desktop/R_class_winter_2015_home/0_Bio559R_course_final_files/week_9/data") # libraries library(ape) library(geiger) library(phytools) # Load our trees and data from last class and check concordance: bird_pruned_tree <- read.tree("bird_pruned_MB.newick") bird_pruned_tree birds_traits_data <- read.table("bird_data_pruned.txt", header = TRUE, sep = "\t") birds_traits_data # Data transformation: birds_traits_data$log10_Mass_g <- log10(birds_traits_data$Mass_g) head(birds_traits_data) ######################################### Geiger ######################################### # Similar to fitDiscrete, fitContinuous is a very flexible function of 'geiger’. # This method provides a set of statistics that allows us to decide which model best # describes the trait that we want to reconstruct. # 14) First, we need to prepare our data using the function 'treedata' log10_Mass_g_birds_geiger <- treedata(bird_pruned_tree, birds_traits_data) log10_Mass_g_birds_geiger$phy # a class phylo log10_Mass_g_birds_geiger$data # a matrix # 15) We are going to prepare our data as before, but we are going to use the modified code # for the function that is present in the website: phy <- log10_Mass_g_birds_geiger$phy all_dat <- log10_Mass_g_birds_geiger$data dat <- as.numeric(all_dat[,15]) names(dat) <- all_dat[,1] ######################## Compare_models_fitContinuous: copy the function as is indicated below ######################### results_fitContinuous_one_trait <- function (phy,dat) { ##### test concordance #### print(name.check (phy, dat)) #### COMPARE MODELS #### models=c("BM", "OU", "EB", "white", "lambda", "kappa", "delta") # vector that define set of models to compare summaries=c("Brownian Motion -- BM", "Ornstein-Uhlenbeck -- OU", "Early Burst -- EB", "white noise -- non-phylogenetic", "Pagel's lambda", "Pagel's kappa", "Pagel's delta") summaries_2=c("Brownian Motion with no ME -- BM", "Ornstein-Uhlenbeck with no ME -- OU", "Early Burst with no ME -- EB", "white noise with no ME -- non-phylogenetic", "Pagel's lambda with no ME ", "Pagel's kappa with no ME ", "Pagel's delta with no ME ") ## ESTIMATING measurement error ## aic.se <- numeric(length(models)) #creates a numeric vector for the four models with 0 aicc.se <- numeric(length(models)) lnl.se <- numeric(length(models)) parameters.se <- numeric(length(models)) alpha.se <- numeric(length(models)) eb_a.se <- numeric(length(models)) lambda_est.se <- numeric(length(models)) kappa_est.se <- numeric(length(models)) delta_est.se <- numeric(length(models)) for(m in 1:length(models)){ # m is a variable that corresponds 1:4 the total number of models on the vector models cat("\n\n\t*** ", paste(toupper(summaries[m]),": fitting ", sep=""), models[m]," with SE *** \n", sep="") # print this title before calculating tmp=fitContinuous(phy,dat,SE=NA, model=models[m],bounds=list(SE=c(0,0.5))) print(tmp) aic.se[m]=round(tmp$opt$aic,4) aicc.se[m]=round(tmp$opt$aicc,4) lnl.se[m]=round(tmp$opt$lnL,4) parameters.se[m]=tmp$opt$k alpha.se[m] <- ifelse(!is.null(tmp$opt$alpha), round(tmp$opt$alpha,4), "------") eb_a.se[m] <- ifelse(!is.null(tmp$opt$a), round(tmp$opt$a,4), "------") lambda_est.se[m] <- ifelse(!is.null(tmp$opt$lambda), round(tmp$opt$lambda,4), "------") kappa_est.se[m] <- ifelse(!is.null(tmp$opt$kappa), round(tmp$opt$kappa,4), "------") delta_est.se[m] <- ifelse(!is.null(tmp$opt$delta), round(tmp$opt$delta,4), "------") } ### Get list of parameters ### parameter_matrix.se <- matrix(,length(models), 5) parameter_matrix.se[] <- c(alpha.se,eb_a.se,lambda_est.se,kappa_est.se,delta_est.se) parameter.se_df <- as.data.frame (parameter_matrix.se) row.names(parameter.se_df) <- models names(parameter.se_df) <- c("alpha", "EB-a", "lambda", "kappa", "delta") parameter.se_df ## ASSUMING no measurement error ## aic <- numeric(length(models)) aicc <- numeric(length(models)) lnl <- numeric(length(models)) parameters <- numeric(length(models)) alpha <- numeric(length(models)) eb_a <- numeric(length(models)) lambda_est <- numeric(length(models)) kappa_est <- numeric(length(models)) delta_est <- numeric(length(models)) for(m in 1:length(models)){ cat("\n\n\n\n\t*** ", paste(toupper(summaries_2[m]),": fitting ", sep=""), models[m]," *** \n", sep="") tmp=fitContinuous(phy,dat,SE=0,model=models[m]) print(tmp) aic[m]=round(tmp$opt$aic,4) aicc[m]=round(tmp$opt$aicc,4) lnl[m]=round(tmp$opt$lnL,4) parameters[m]=tmp$opt$k alpha[m] <- ifelse(!is.null(tmp$opt$alpha), round(tmp$opt$alpha,4), "------") eb_a[m] <- ifelse(!is.null(tmp$opt$a), round(tmp$opt$a,4), "------") lambda_est[m] <- ifelse(!is.null(tmp$opt$lambda), round(tmp$opt$lambda,4), "------") kappa_est[m] <- ifelse(!is.null(tmp$opt$kappa), round(tmp$opt$kappa,4), "------") delta_est[m] <- ifelse(!is.null(tmp$opt$delta), round(tmp$opt$delta,4), "------") } ### Get list of parameters ### parameter_matrix <- matrix(,length(models), 5) parameter_matrix[] <- c(alpha,eb_a,lambda_est,kappa_est,delta_est) parameter_df <- as.data.frame (parameter_matrix) row.names(parameter_df) <- models names(parameter_df) <- c("alpha", "EB-a", "lambda", "kappa", "delta") parameter_df ## COMPARE AIC ## names(aic.se)<-names(lnl.se)<-names(aic)<-names(lnl)<-models # assign the same name of the model to estimated parameters delta_aic<-function(x) x-x[which(x==min(x))] #define difference (delta) AIC function that subtract the model with the lowest AIC (i.e., the best scoring) # no measurement error daic <- round(delta_aic(aicc),3) daic_sig <- matrix(,1,length(daic)) for (tt in 1:length(daic)) { ifelse (daic[tt] <= 3, daic_sig [1,tt] <- "ns", ifelse( daic[tt] > 3 & daic[tt] <= 6, daic_sig [1,tt] <- "*", ifelse( daic[tt] > 6 & daic[tt] <= 9, daic_sig [1,tt] <- "**", daic_sig [1,tt] <- "***") ) ) } cat("\n\n\t\t\t\t*** MODEL COMPARISON no measurement error --log10_Mass_g *** \n",sep="") cat("\tdelta-AIC values for models assuming no measurement error \t\t\t\t zero indicates the best model\n\n") bb <- as.data.frame(rbind(daic,c(daic_sig))) names(bb) <- models row.names(bb) <- c("delta_aicc", "significance") print(bb) # measurement error daic.se <- round(delta_aic(aicc.se),3) daic.se_sig <- matrix(,1,length(daic.se)) for (tt in 1:length(daic.se)) { ifelse (daic.se[tt] <= 3, daic.se_sig [1,tt] <- "ns", ifelse(daic.se[tt] > 3 & daic.se[tt] <= 6, daic.se_sig [1,tt] <- "*", ifelse(daic.se[tt] > 6 & daic.se[tt] <= 9, daic.se_sig [1,tt] <- "**", daic.se_sig [1,tt] <- "***") ) ) } cat("\n\n\t\t\t\t*** MODEL COMPARISON with measurement error -- log10_Mass_g ***\n",sep="") cat("\t\t delta-AIC values for models estimating SE \t\t\t\t zero indicates the best model\n\n") cc <- as.data.frame(rbind(daic.se,c(daic.se_sig))) names(cc) <- models row.names(cc) <- c("delta_aicc.se", "significance_SE") print(cc) cat("\n\n") res_aicc <- rbind(aic, daic, daic_sig, aic.se, daic.se, daic.se_sig) rownames(res_aicc) <- c("AICc","dAICc", "significance", "AICc_SE","dAICc_SE", "significance_SE") res_aicc_df <- as.data.frame(res_aicc) print(res_aicc_df) cat("\n\n\t\t\t\t*** MODEL COMPARISON PARAMETERS -- without SE ***\n",sep="") print(parameter_df) cat("\n\n\t\t\t\t*** MODEL COMPARISON PARAMETERS -- with SE ***\n",sep="") print(parameter.se_df) } ######### run function results_fitContinuous_one_trait(phy,dat) # 16) This is the type of output that you might get: # [1] "OK” <--- The data is concordant. # *** BROWNIAN MOTION -- BM: fitting BM with SE *** #GEIGER-fitted comparative model of continuous data # fitted ‘BM’ model parameters: # sigsq = 0.006036 # SE = 0.000000 # z0 = 2.814478 # model summary: # log-likelihood = -183.527424 # AIC = 373.054848 # AICc = 373.108539 # free parameters = 3 #Convergence diagnostics: # optimization iterations = 100 # failed iterations = 0 # frequency of best fit = 0.31 # object summary: # 'lik' -- likelihood function # 'bnd' -- bounds for likelihood search # 'res' -- optimization iteration summary # 'opt' -- maximum likelihood parameter estimates # 17) The model comparison based on AIC # *** MODEL COMPARISON no measurement error --log10_Mass_g *** # delta-AIC values for models assuming no measurement error # zero indicates the best model # BM OU EB white lambda kappa delta # delta_aicc 59.282 59.092 61.31 719.142 0 6.169 60.808 # significance *** *** *** *** ns ** *** # *** MODEL COMPARISON with measurement error -- log10_Mass_g *** # delta-AIC values for models estimating SE # zero indicates the best model # BM OU EB white lambda kappa delta # delta_aicc.se 0.623 2.659 0.141 721.792 2.659 0.634 0 # significance *** *** *** *** ns ** *** # BM OU EB white lambda kappa delta # AICc 371.0548 370.8381 373.0556 1030.9143 311.7458 317.9148 372.5542 # dAICc 59.282 59.092 61.31 719.142 0 6.169 60.808 # significance *** *** *** *** ns ** *** # AICc_SE 311.7458 313.7458 311.2271 1032.9143 313.7458 311.7201 311.0864 # dAICc_SE 0.623 2.659 0.141 721.792 2.659 0.634 0 # significance_SE ns ns ns *** ns ns ns # 19) The model comparison based on AIC and summary statistics (parameters): # *** MODEL COMPARISON PARAMETERS -- without SE *** # alpha EB-a lambda kappa delta # BM ------ ------ ------ ------ ------ # OU 0.0031 ------ ------ ------ ------ # EB ------ 0 ------ ------ ------ # white ------ ------ ------ ------ ------ # lambda ------ ------ 0.9903 ------ ------ # kappa ------ ------ ------ 0.6987 ------ # delta ------ ------ ------ ------ 1.2214 # *** MODEL COMPARISON PARAMETERS -- with SE *** # alpha EB-a lambda kappa delta # BM ------ ------ ------ ------ ------ # OU 0 ------ ------ ------ ------ # EB ------ -0.0082 ------ ------ ------ # white ------ ------ ------ ------ ------ # lambda ------ ------ 0.9903 ------ ------ # kappa ------ ------ ------ 0.8636 ------ # delta ------ ------ ------ ------ 0.4981 # 19) The best fitting model was lambda assuming no measurement error # *** PAGEL'S LAMBDA WITH NO ME : fitting lambda *** # GEIGER-fitted comparative model of continuous data # fitted ‘lambda’ model parameters: # lambda = 0.990334 <--- Notice the high lambda value # sigsq = 0.004739 # z0 = 2.812349 # model summary: # log-likelihood = -152.872911 # AIC = 311.745822 # AICc = 311.799513 # free parameters = 3 # Convergence diagnostics: # optimization iterations = 100 # failed iterations = 0 # frequency of best fit = 0.12 # object summary: # 'lik' -- likelihood function # 'bnd' -- bounds for likelihood search # 'res' -- optimization iteration summary # 'opt' -- maximum likelihood parameter estimates #################### Phylogenetic Signal for Continuous Characters ######################### # 19) Let's test for phylogenetic signal # We can use Pagel's lambda to assesses the phylogenetic signal (tendency of closely related # species to resemble for an specific trait). Lambda parameter is used as a tree transformation # parameter that multiply the internal branches of the tree by values between 0 and 1. # A lambda of 1 indicates strong signal # A lambda of 0 indicates son signal # We can determine the magnitude of the signal by estimating the lambda in our tree and then # compare the Log-lik with that of tree transformed to a polytomy (i.e., branches multiplied by # lambda = 0) # Pagel, M. 1994. Detecting correlated evolution on phylogenies: a general method for the comparative analysis of # discrete characters. Proceedings of the Royal Society of London B 255:37-45. # 20) Let's estimate our lambda 0 tree bird_pruned_tree_lambda_0 <- rescale(bird_pruned_tree, model = "lambda", 0) #Let's plot this tree and the original bird tree par(mfrow=c(1,2)) plot(bird_pruned_tree, edge.width = 1, show.tip.label = FALSE) #smaller tree add.scale.bar(cex = 0.7, font = 2, col = "red") plot(bird_pruned_tree_lambda_0, edge.width = 1, show.tip.label = FALSE) #smaller tree add.scale.bar(cex = 0.7, font = 2, col = "red") #20) Let's fit the lambda transformation to determine if there is signal # Fit lambda to our tree our_data_fitContinuous <- fitContinuous(phy,dat,SE=0, model="lambda",bounds=list(SE=c(0,0.5))) our_data_fitContinuous # GEIGER-fitted comparative model of continuous data # fitted ‘lambda’ model parameters: # lambda = 0.990334 # sigsq = 0.004739 # z0 = 2.812349 # model summary: # log-likelihood = -152.872911 # AIC = 311.745822 # AICc = 311.799513 # free parameters = 3 # Convergence diagnostics: # optimization iterations = 100 # failed iterations = 0 # frequency of best fit = 0.14 # object summary: # 'lik' -- likelihood function # 'bnd' -- bounds for likelihood search # 'res' -- optimization iteration summary # 'opt' -- maximum likelihood parameter estimates # 21) Fit the lamda 0 tree (no signal) our_data_fitContinuous_lambda_0 <- fitContinuous(bird_pruned_tree_lambda_0,dat,SE=0, model="lambda",bounds=list(SE=c(0,0.5))) our_data_fitContinuous_lambda_0 # GEIGER-fitted comparative model of continuous data # fitted ‘lambda’ model parameters: # lambda = 0.000000 # sigsq = 0.004776 # z0 = 1.938157 # model summary: # log-likelihood = -513.457139 # AIC = 1034.914278 # AICc = 1035.003965 # free parameters = 4 # Convergence diagnostics: # optimization iterations = 100 # failed iterations = 0 # frequency of best fit = 0.69 # object summary: # 'lik' -- likelihood function # 'bnd' -- bounds for likelihood search # 'res' -- optimization iteration summary # 'opt' -- maximum likelihood parameter estimates #22) For model comparison, the model with the lowest AIC score is preferred our_tree_versus_lambda_0_aicc <- abs(our_data_fitContinuous_lambda_0$opt$aicc - our_data_fitContinuous$opt$aicc) our_tree_versus_lambda_0_aicc # 721.1685 -- lambda 0 tree (no signal) is very unlikely ##### our tree versus lambda 0 d_our_tree_versus_lambda_0 <- abs(2*(our_data_fitContinuous_lambda_0$opt$lnL-our_data_fitContinuous$opt$lnL)) d_our_tree_versus_lambda_0 # [1] 721.1685 p_our_tree_versus_lambda_0 <- pchisq(d_our_tree_versus_lambda_0, 1, lower.tail=FALSE) p_our_tree_versus_lambda_0 #[1] 7.457298e-159 -- Rejects no signal model ############################################################################################################## ##############################################################################################################