# Character evolution usinf BiSSE # 1) Make a working directory (e.g., BiSSE analyses) and select it as your working. setwd("/Users/corytophanes/Desktop/week_10/") # 2) Several packages and functions are available for this purpose. library(ape) install.packages("diversitree") #only works in mac-Snow Leopard and Windows library(geiger) library(diversitree) ##### To install a source with no binary in Mavericks ##### # from webpage: https://gist.github.com/jonchang/f7206f2e66dd665ec2b3 # open your working directory in terminal # copy: curl -O https://dl.dropboxusercontent.com/u/8859543/diversitree_0.9-7.tgz # In R, find the location or path to your R-libraries: .libPaths() # [1] "/Library/Frameworks/R.framework/Versions/3.1/Resources/library" # copy the unziped 'diversitree' folder to that library path # load library normally: library(diversitree) # Notice: If you want to install many related packages at once use: # install.views("Phylogenetics") # 3) Load our trees and data from last class and check concordance: bird_pruned_tree <- read.tree("bird_pruned_MB.newick") bird_pruned_tree birds_traits_data <- read.table("bird_data_pruned.txt", header = TRUE, sep = "\t") # 4) select only a binary variable # only migration birds_migration_temp <- as.data.frame(birds_traits_data$Migration) # Select this binary data and make it a vector birds_migration_temp$species <- birds_traits_data$Genus_Species birds_migration_temp # eliminate incomplete cases and make data frame birds_migration_data <- birds_migration_temp[complete.cases(birds_migration_temp),] row.names(birds_migration_data) <- birds_migration_data$species names(birds_migration_data) <- c("migration", "genus_species") birds_migration_data # make factors to characters birds_migration_data$migration <- as.character(birds_migration_data$migration) # make data truly binary (0-no migratory and 1-migratory) birds_migration_data$migration[birds_migration_data$migration == "No"] <- "0" birds_migration_data$migration[birds_migration_data$migration == "Yes"] <- "1" birds_migration_data # 5) use the 'name.check' function to determine if we have concordance name.check (bird_pruned_tree, birds_migration_data) # we check the concordance between a data file and a phylogenetic tree. # NOT CONCORDANT # Provides a list of species in tree with no data ($tree_not_data) # 6) We make concordant need to prepare our data using the function 'treedata' birds_migration <- treedata(bird_pruned_tree, birds_migration_data) birds_migration$phy # a class phylo birds_migration$data # a matrix # 7) We are going to prepare our data as before, but we are going to use the modified code # for the function that is present in the website: birds_migration_phy <- birds_migration$phy birds_migration_dat <- as.numeric(birds_migration$data[,1]) names(birds_migration_dat) <- birds_migration$data[,2] birds_migration_dat birds_migration_dat_matrix <- as.matrix(birds_migration_dat) # 9) Let's start with the most simple BiSSE analyses. A constant rate of speciation and extinction (i.e., fitting # a birth-death model) through the chronogram. This model assumes that the net diversification (birth - death) # does not change over the lineage history or it is associated with phenotype. # As in most comparative analyses, we need to estimate the most complex model and then constraint parameters. Then, # compare model fit between the complex and simple models, which will allow us to determine its significance. # in this case, as we are looking at constant rates, that means estimating both a speciation and an extinction rate for the phylogeny. Let's first make the likelihood function: #### Running diversification models in diversitree # 10) Step 1: Define a likelihood function for the phylogeny. This function takes parameter values as arguments and returns the log-likelihood associated with those parameter values. birds_migration_bd_full.lik <- make.bd(birds_migration_phy) birds_migration_bd_full.lik # Constant rate birth-death likelihood function: # * Parameter vector takes 2 elements: # - lambda, mu # * Function takes arguments (with defaults) # - pars: Parameter vector # - condition.surv [TRUE]: Condition likelihood on survial? # - intermediates [FALSE]: Also return intermediate values? # * Phylogeny with 399 tips and 398 nodes # - Taxa: Nothoprocta_perdicaria, Apteryx_australis, Apteryx_haastii, Apteryx_owenii, ... # * Reference: # - Nee et al. (1994) doi:10.1098/rstb.1994.0068 # R definition: # function (pars, condition.surv = TRUE, intermediates = FALSE) argnames(birds_migration_bd_full.lik) # function of diversitree that gives the name of parameters # [1] "lambda" "mu" --> these are the speciation-lambda and extinction-mu rates # 11) Step 2: we can estimate the likelihood for a given set of parameter values using the likelihood function that we estimated for our tree. # pure-birth: c(0.5, 0) --> lambda = 0.5 and extinction = 0 (Yule process) birds_migration_bd_full.lik(c(0.5, 0)) # [1] -3178.799 # birth-death values: c(0.2, 0.1) birds_migration_bd_full.lik(c(0.2, 0.1)) # [1] -63.90782 # We can compare both likelihood scores and we want the highest (less negative) score. # Then, birth-death model with lambda = 0.2 and extinction = 0.1 is much better. # 12) Step 3: We can find the MLE (maximum likelihood estimator) that will select the best values # for lambda and mu that maximize the likelihood score using find.mle(). # you can also use to get starting points for estation: starting.point.bd(birds_migration_phy, yule=FALSE) # lambda mu # 0.04057479 0.00000000 parameters_bd <- c(0.2, 0.1) birds_migration_bd_fit_mle <- find.mle(birds_migration_bd_full.lik, parameters_bd) birds_migration_bd_fit_mle # $par # lambda mu # $ 4.836804e-02 4.944044e-06 # $lnLik # [1] 312.7964 # $counts # [1] 21 # $code # [1] 2 # $gradient # [1] 1575.9634 -451.2177 # $method # [1] "nlm" # $func.class # [1] "bd" "dtlik" "function" # attr(,"func") # Constant rate birth-death likelihood function: # * Parameter vector takes 2 elements: # - lambda, mu # * Function takes arguments (with defaults) # - pars: Parameter vector # - condition.surv [TRUE]: Condition likelihood on survial? # - intermediates [FALSE]: Also return intermediate values? # * Phylogeny with 399 tips and 398 nodes # - Taxa: Nothoprocta_perdicaria, Apteryx_australis, Apteryx_haastii, Apteryx_owenii, ... # * Reference: # - Nee et al. (1994) doi:10.1098/rstb.1994.0068 # R definition: # function (pars, condition.surv = TRUE, intermediates = FALSE) # attr(,"class") # [1] "fit.mle.bd" "fit.mle" # 13) you can get some of the specific results of the function names(birds_migration_bd_fit_mle) # [1] "par" "lnLik" "counts" "code" "gradient" "method" "func.class" birds_migration_bd_fit_mle$par # gives lambda and mu birds_migration_bd_fit_mle$lnLik # gives the lnLik (score) birds_migration_bd_fit_mle$count # The number of function evaluations performed during the search. birds_migration_bd_fit_mle$code # Convergence code and 0 is preferred coef(birds_migration_bd_fit_mle) # gives lambda and mu logLik(birds_migration_bd_fit_mle) # gives the log Lik.' 312.7964 (df=2) AIC(birds_migration_bd_fit_mle) # -621.5928 score # 14) Estimate the net diversification rate (i.e., r = lambda - mu) birds_migration_bd_fit_mle_r <- birds_migration_bd_fit_mle$par[1] - birds_migration_bd_fit_mle$par[2] birds_migration_bd_fit_mle_r <- birds_migration_bd_fit_mle$par['lambda'] - birds_migration_bd_fit_mle$par['mu'] # the same process # lambda # 0.0483631 birds_migration_bd_fit_mle$par[1]/birds_migration_bd_fit_mle$par[2] # lambda # 9783.093 # Notice that under this birth-death model, the value of lambda is very large if compared to mu (almost 10000 times larger) # 15) Defining constrain (simpler) models is necessary before we compare our full model that # will allow us to determine interesting trends in our data. We use the function constrain to # create new function that satisfies the condition # constrain (lik function, parameters to constrain to be equal defined by parameter_A '~' parameter_B or value) bird_mu_0.lik <- constrain(birds_migration_bd_full.lik, mu ~ 0) # pure birth argnames(bird_mu_0.lik) # [1] "lambda" --> only lambda is a free parameter bird_lambda_equal_mu.lik <- constrain(birds_migration_bd_full.lik, lambda ~ mu) argnames(bird_lambda_equal_mu.lik) # [1] "mu" --> only mu is a free parameter # 16) Let's evaluate constrain functions birds_migration_mu_0_mle <- find.mle(bird_mu_0.lik, 0.1) # 0.1 is an starting parameter for lambda birds_migration_mu_0_mle # $par # lambda # 0.04057479 # $lnLik # [1] 319.2956 # $counts # [1] 8 # $code # [1] 1 logLik(birds_migration_mu_0_mle) # 'log Lik.' 319.2956 (df=1) ##### birds_migration_lambda_equal_mu_mle <- find.mle(bird_lambda_equal_mu.lik, 0.1) # 0.1 is an starting parameter for mu birds_migration_lambda_equal_mu_mle # $par # mu # 0.1 # $lnLik # [1] 221.9229 # $counts # [1] 0 # $code # [1] 1 logLik(birds_migration_lambda_equal_mu_mle) # 'log Lik.' 221.9229 (df=1) # 17) Model comparison using ANOVA # extinction constraint is null anova(birds_migration_bd_fit_mle, mu_0=birds_migration_mu_0_mle) # Df lnLik AIC ChiSq Pr(>|Chi|) # full 2 312.8 -621.59 # mu_0 1 319.3 -636.59 -12.998 1 # Warning message: # In anova.fit.mle(birds_migration_bd_fit_mle, mu_0 = birds_migration_mu_0_mle) : # Impossible chi-square values: probable convergence failures # Notice: that the chi-square is not valid. However, see the AIC difference # > 10 and the best model (full) has lowest AIC. Therefore, speciation is higher than extinction # speciation and extinction equal anova(birds_migration_bd_fit_mle, lambda_equal_mu=birds_migration_lambda_equal_mu_mle) # Df lnLik AIC ChiSq Pr(>|Chi|) # full 2 312.80 -621.59 # lambda_equal_mu 1 221.92 -441.85 181.75 < 2.2e-16 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # We reject that lambda and mu are equal # 18) Let's try character evolution using diversitree. We will use the basic Mk2 and # Mk-n Models of character evolution. This fits the Pagel 1994 model, duplicating the # ace function in ape. Differences with that function include # (1) alternative root treatments are possible, # (2) easier to tweak parameter combinations through constrain # (3) run both MCMC and MLE fits to parameters. # Three flavors: # binary: make.mk2(tree, states, strict=TRUE, control=list()) # k states: make.mkn(tree, states, k, strict=TRUE, control=list()) # k states ordered: make.mkn.meristic(tree, states, k, control=list()) # Preparing data for binary birds_migration_phy birds_migration_dat_df <- as.data.frame(birds_migration_dat) # make data frame birds_migration_dat_df$species_name <- rownames(birds_migration_dat_df) # crate column with species names birds_migration_dat_ordered <- birds_migration_dat_df[birds_migration_phy$tip.label,] # order data based on tree label order birds_migration_dat_bin <- birds_migration_dat_ordered[,1] # get vector of binary trait names(birds_migration_dat_bin) <- rownames(birds_migration_dat_ordered) # name elements of binary trait by species birds_migration_dat_bin # 19) Prepare Mk-2 model comparison # get the likelihood function birds_migration_mk2_full.lik <- make.mk2(birds_migration_phy, birds_migration_dat_bin) argnames(birds_migration_mk2_full.lik) # [1] "q01" "q10" this indicated the free parameters q01 (non-migratory to migratory) # q10 (migratory to non-migratory) # define constrain equal (q01 = q10) versus different rates migratory_contratin.lik <- constrain(birds_migration_mk2_full.lik, q01 ~ q10) argnames(migratory_contratin.lik) # [1] "q10" # evaluate models: birds_migration_mke_full_mle <- find.mle(birds_migration_mk2_full.lik, c(0.2, 0.1)) # q01 = 0.2, q10 = 0.1 are starting value birds_migration_mke_full_mle # $par # q01 q10 # 0.00645963 0.01744694 # # $lnLik # [1] -193.3988 # # $counts # [1] 82 # # $convergence # [1] 0 <-- did converge code "0" is usually good (more info ?find.mle) birds_migration_mke_constrain_mle <- find.mle(migratory_contratin.lik, 0.1) # q01 = q10 = 0.1 is a starting value birds_migration_mke_constrain_mle # $par # q10 # 0.008105469 # $lnLik # [1] -199.2046 # $counts # [1] 36 # $convergence # [1] 0 <-- did converge code "0" is usually good (more info ?find.mle) # 20) Compare models anova(birds_migration_mke_full_mle, q01_equal_q10=birds_migration_mke_constrain_mle) # Df lnLik AIC ChiSq Pr(>|Chi|) # full 2 -193.40 390.80 # q01_equal_q10 1 -199.21 400.41 11.612 0.0006554 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # We conclude that q01 and q10 are different and q10 > q01 (it is easier to become # a non-migratory bird from a migratory state. # 21) We can estimate the likelihood that the states at internal nodes in the phylogeny # using the function: asr.marginal ?asr.marginal birds_migration_mke_anc.states <- asr.marginal(birds_migration_mk2_full.lik, birds_migration_mke_full_mle$par) birds_migration_mke_anc.states_t <- t(birds_migration_mke_anc.states) # Transposition for plotting #22) Plot reconstructions on tree using ape plot(birds_migration_phy, no.margin=TRUE, cex=0.5, label.offset=4, type = "fan") tip_colors <- rep('blue', times=length(birds_migration_dat_bin)) # we are going to create a vector and consider blue -- no migratory tip_colors[birds_migration_dat_bin == 1] <- 'red' # assign color red to migratory birds tiplabels(bg=tip_colors, pch=21, adj=2.5) # Plot the tip labels on tree nodelabels(pie=birds_migration_mke_anc.states_t, piecol=c("blue","red"), cex=0.12) ####################### BiSSE Model ###################################################### #23) Prepare BiSSE model comparison. Remember BiSSE represent the parameters: # Based on a single binary trait here are the following parameters: # λ0 --> speciation rate associated to state 0 -- no migration # λ1 --> speciation rate associated to state 1 -- migration # μ0 --> extinction rate associated to state 0 -- no migration # μ1 --> extinction rate associated to state 1 -- migration # q01 --> transition from 0 -- no migration to 1 -- migration # q10 --> transition from 1 -- migration to 0 -- no migration # 24) Let's define the full likelihood BiSSE model and we can provide starting values for # the likelihood search for more complicated analyses (like this one). # full BiSSE Model birds_migration_BiSSE_full.lik <- make.bisse(birds_migration_phy, birds_migration_dat_bin) argnames(birds_migration_BiSSE_full.lik) # [1] "lambda0" "lambda1" "mu0" "mu1" "q01" "q10" --> we can use these to define constrain models # get the starting values starting_values_birds_migration <- starting.point.bisse(birds_migration_phy) starting_values_birds_migration # lambda0 lambda1 mu0 mu1 q01 q10 # 0.040574788 0.040574788 0.000000000 0.000000000 0.008114958 0.008114958 # Find the MLE for full model birds_migration_BiSSE_full_mle <- find.mle(birds_migration_BiSSE_full.lik, starting_values_birds_migration) birds_migration_BiSSE_full_mle # $par # lambda0 lambda1 mu0 mu1 q01 q10 # 0.032968551 0.052087911 0.000000000 0.000000000 0.003888446 0.025049772 # $lnLik # [1] -1856.445 # $counts # [1] 304 # $convergence # [1] 0 <-- evidence of convergence # 25) Define different constrain models # same speciation rate equal (lambda0 = lambda1) starting_values_birds_migration[2:6] # less starting parameters # lambda1 mu0 mu1 q01 q10 # 0.040574788 0.000000000 0.000000000 0.008114958 0.008114958 migratory_constrain_speciation_rate_BiSSE.lik <- constrain(birds_migration_BiSSE_full.lik, lambda0 ~ lambda1) constratin_speciation_rate_BiSSE_mle <- find.mle(migratory_constrain_speciation_rate_BiSSE.lik, starting_values_birds_migration[2:6]) constratin_speciation_rate_BiSSE_mle # $par # lambda1 mu0 mu1 q01 q10 # 4.059865e-02 3.272882e-05 8.947200e-06 6.441251e-03 1.749788e-02 # $lnLik # [1] -1862.631 # $counts # [1] 366 # $convergence # [1] 0 # define constrain equal (q01 = q10) starting_values_birds_migration[1:5] migratory_constrain_trasition_rate_BiSSE.lik <- constrain(birds_migration_BiSSE_full.lik, q01 ~ q10) constratin_trasition_rate_BiSSE_mle <- find.mle(migratory_constrain_trasition_rate_BiSSE.lik, starting_values_birds_migration[1:5]) constratin_trasition_rate_BiSSE_mle # $par # lambda0 lambda1 mu0 mu1 q01 # 3.941650e-02 5.175425e-02 7.375324e-07 1.484734e-02 8.733015e-03 # $lnLik # [1] -1866.894 # $counts # [1] 343 # $convergence # [1] 0 # Make a list of coefficients for full and constrain BiSSE models install.packages("gtools") library(gtools) migration_full <- coef(birds_migration_BiSSE_full_mle) speciation_cons <- coef(constratin_speciation_rate_BiSSE_mle) transition_cons <- coef(constratin_trasition_rate_BiSSE_mle) all_models <- smartbind(migration_full, speciation_cons, transition_cons) rownames(all_models) <- c("full", "equal_lambda", "equal_transition") all_models # lambda0 lambda1 mu0 mu1 q01 q10 # full 0.03296855 0.05208791 0.000000e+00 0.0000000000 0.003888446 0.02504977 # equal_lambda NA 0.04059865 3.272882e-05 0.0000089472 0.006441251 0.01749788 # equal_transition 0.03941650 0.05175425 7.375324e-07 0.0148473383 0.008733015 NA # 26) Compare models anova(birds_migration_BiSSE_full_mle, equal_speciation = constratin_speciation_rate_BiSSE_mle) # Df lnLik AIC ChiSq Pr(>|Chi|) # full 6 -1856.4 3724.9 # equal_speciation 5 -1862.6 3735.3 12.373 0.0004356 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # Rejects: equal speciation then lambda1 > lambda0 (migratory speciate more) anova(birds_migration_BiSSE_full_mle, equal_transition =constratin_trasition_rate_BiSSE_mle) # Df lnLik AIC ChiSq Pr(>|Chi|) # full 6 -1856.4 3724.9 # equal_transition 5 -1866.9 3743.8 20.898 4.843e-06 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # Rejects: equal rates then q10 > q01 (higher rate of transition from migratory to non-migratory) # 27) Let's reconstruct ancestral states using the BiSSE model birds_migration_bisse_anc.states <- asr.marginal(birds_migration_BiSSE_full.lik, birds_migration_BiSSE_full_mle$par) birds_migration_bisse_anc.states_t <- t(birds_migration_bisse_anc.states) # 28) Plot reconstructions on tree using ape plot(birds_migration_phy, no.margin=TRUE, cex=0.5, label.offset=4, type = "fan") tip_colors <- rep('blue', times=length(birds_migration_dat_bin)) # we are going to create a vector and consider blue -- no migratory tip_colors[birds_migration_dat_bin == 1] <- 'red' # assign color red to migratory birds tiplabels(bg=tip_colors, pch=21, adj=2.5) # Plot the tip labels on tree nodelabels(pie=birds_migration_bisse_anc.states_t, piecol=c("blue","red"), cex=0.12) ##########################################################################################