# Tree simulations # 1) Make a working directory (e.g., tree simulations) and select it as your working. setwd("/Users/jcsantos/Desktop/R_class_winter_2015_home/0_Bio559R_course_final_files/week_9_office/data") # 2) Several packages and functions are available for this purpose. library(ape) library(geiger) install.packages("TreeSim") library(TreeSim) install.packages("phyclust") library(phyclust) # 3) Load our trees and data from last class and check concordance: bird_pruned_tree <- read.tree("bird_pruned_MB.newick") bird_pruned_tree birds_traits_data <- read.table("bird_data_pruned.txt", header = TRUE, sep = "\t") birds_traits_data # 4) use the 'name.check' function to determine if we have concordance name.check (bird_pruned_tree, birds_traits_data) # we check the concordance between a data file and a phylogenetic tree. # [1] "OK" # we have concordance (i.e., we the same number of tips and data per tip) ###################### Tree simulations in ape ############################### # 5) Generates Random Trees. We are going to use our pruned tree as reference bird_pruned_tree # Phylogenetic tree with 451 tips and 450 internal nodes. # # Tip labels: # Nothoprocta_perdicaria, Apteryx_australis, Apteryx_haastii, Apteryx_owenii, Dromaius_novaehollandiae, Struthio_camelus, ... #Rooted; includes branch lengths. # 6) We will use the following functions: rtree # basic random trees: # n is number of tips same number of our tree: length(bird_pruned_tree$tip.label) # rooted a logical (TRUE or FALSE) # tip.label provide a list of species names: bird_pruned_tree$tip.label bird_pruned_random_tree <- rtree(n = length(bird_pruned_tree$tip.label), rooted = TRUE, tip.label = bird_pruned_tree$tip.label) is.ultrametric(bird_pruned_random_tree) # [1] FALSE # 7) We will use the following functions: rcoal # Random coalescent tree: branching times determined by coalescent process (i.e., ultrametric and nodes pushed towards tips) bird_pruned_random_coalescent_tree <- rcoal(n = length(bird_pruned_tree$tip.label), rooted = TRUE, tip.label = bird_pruned_tree$tip.label) is.ultrametric(bird_pruned_random_coalescent_tree) # [1] TRUE # generate for random ultrametric trees: for (i in 1:4) { # generate 4 random trees, save and append to tree file tree <- rcoal(n = length(bird_pruned_tree$tip.label), rooted = TRUE, tip.label = bird_pruned_tree$tip.label) write.tree(tree, file = "rcol_random_trees.tre", append = TRUE, digits = 10, tree.names = FALSE) } # 8) Plot trees simulated with rtree and rcoal par(mfrow=c(1,2)) plot(bird_pruned_random_tree, show.tip.label = FALSE) title("Random tree") add.scale.bar(cex = 0.7, font = 2, col = "red") plot(bird_pruned_random_coalescent_tree, show.tip.label = FALSE) title("Random coalescent tree") add.scale.bar(cex = 0.7, font = 2, col = "red") # 9) Generates Systematic Regular Trees using: 'stree' # tip.label provide a list of species names: bird_pruned_tree$tip.label # n is number of tips same number of our tree: length(bird_pruned_tree$tip.label) # type: “star”a star (or comb) tree with a single internal node. # “balanced”a fully balanced dichotomous rooted tree; n must be a power of 2 # “left”a fully unbalanced rooted tree where the largest clade is on the left-hand side # “right”a fully unbalanced rooted tree where the largest clade is on the right-hand side random_fan_tree <- stree(length(bird_pruned_tree$tip.label), type = "star", tip.label = bird_pruned_tree$tip.label) random_left_tree <- stree(length(bird_pruned_tree$tip.label), type = "left", tip.label = bird_pruned_tree$tip.label) par(mfrow=c(1,2)) plot(random_fan_tree, show.tip.label = FALSE) title("Random fan tree") add.scale.bar(cex = 0.7, font = 2, col = "red") plot(random_left_tree, show.tip.label = FALSE) title("Random left tree") add.scale.bar(cex = 0.7, font = 2, col = "red") ######################################### Geiger ######################################### # 10) Tree Simulations Under the Time-Dependent Birth–Death Models # functions: sim.bdtree # sim.bdtree(b=1, d=0, stop=c("taxa", "time"), n=100, t=4, seed=0, extinct=TRUE) # b per-lineage birth (speciation) rate # d per-lineage death (extinction) rate # stop stopping criterion # n maximum number of taxa in simulation # t maximum time steps of simulation # seed random number seed (default is to seed based on the clock) # extinct whether to return trees where all lineages have gone extinct (see Details) # We will use 'drop.fossil' function from ape to drop extinct tips is a utility function # to remove the extinct species. # 11) Plot tree simulations and plots # Yule process (pure-birth i.e. no extinction) # birth rate: 0.1 # death rate: 0 simul_yule_process_lambda_0 <- sim.bdtree(b=0.1, d=0, stop=c("taxa"), n=length(bird_pruned_tree$tip.label), t=4, seed=0, extinct=TRUE) simul_yule_process_lambda_0$tip.label <- bird_pruned_tree$tip.label # give the same names as in the bird_pruned_tree plot(simul_yule_process_lambda_0, show.tip.label = FALSE) title("Tree simulated under the pure-birth (Yule process)") simul_yule_process_lambda_0 #Phylogenetic tree with 451 tips and 450 internal nodes. <--- This is the same number of tips # simple birth-death process # birth rate: 0.1 # death rate: 0.05 simul_birth_0_1_death_0_05 <- sim.bdtree(b=0.1, d=0.05, stop=c("taxa"), n=length(bird_pruned_tree$tip.label), t=4, seed=0, extinct=TRUE) simul_birth_0_1_death_0_05 # Phylogenetic tree with 883 tips and 882 internal nodes. <-- this way more that the tips that we have in our tree simul_birth_0_1_death_0_05_no_extict <- drop.fossil(simul_birth_0_1_death_0_05, tol = 1e-8) simul_birth_0_1_death_0_05_no_extict # Phylogenetic tree with 160 tips and 159 internal nodes. <-- number of extant taxa (no extinct), way lower par(mfrow=c(1,2)) plot(simul_birth_0_1_death_0_05, show.tip.label = FALSE) title("Tree simulated under the birth-death process") add.scale.bar(cex = 0.7, font = 2, col = "red") plot(simul_birth_0_1_death_0_05_no_extict, show.tip.label = FALSE) title("The same tree without extinct taxa") add.scale.bar(cex = 0.7, font = 2, col = "red") ######################################### TreeSim ######################################### # 12) Tree Simulations Under the Time-Dependent Birth–Death Models with a given age on a #fixed number of extant taxa. # sim.bd.taxa.age(n, numbsim, lambda, mu, frac = 1, age, mrca = FALSE) # n Number of extant sampled tips. # numbsim Number of trees to simulate. # lambda Speciation rate. # mu Extinction rate. # frac Each tip is included into the final tree with probability frac. # age The time since origin / most recent common ancestor. # mrca If mrca = FALSE: The time since the origin of the process. # If mrca = TRUE: The time since the most recent common ancestor of the sampled species. # 13) Estimate tree height install.packages("phyclust") library(phyclust) tree_height <- get.rooted.tree.height(bird_pruned_tree) tree_height # [1] 119.4931 # We can use this value to rescale our tree later # Tree simulation using: sim.bd.taxa.age tree_sim_bd <- sim.bd.taxa.age(length(bird_pruned_tree$tip.label), numbsim = 1, lambda = 0.1, mu = 0.05, frac = 1, age = tree_height, mrca = FALSE) # It will return a list simul_birth_0_1_death_0_05_tree_sim <- tree_sim_bd [[1]] simul_birth_0_1_death_0_05_tree_sim$tip.label <- bird_pruned_tree$tip.label # give the same names as in the bird_pruned_tree simul_birth_0_1_death_0_05_tree_sim # Phylogenetic tree with 451 tips and 450 internal nodes. # 14) Simulate multiple trees # generate for random ultrametric trees: for (i in 1:4) { # generate 4 random trees, save and append to tree file tree_sim_bd <- sim.bd.taxa.age(length(bird_pruned_tree$tip.label), numbsim = 1, lambda = 0.1, mu = 0.05, frac = 1, age = tree_height, mrca = FALSE) simul_tree_sim <- tree_sim_bd [[1]] write.tree(tree, file = "simul_birth_0_1_death_0_05_tree_sim_trees.tre", append = TRUE, digits = 10, tree.names = FALSE) } # 15) Read the trees and plot the tree bird_pruned_tree <- read.tree("bird_pruned_MB.newick") simul_birth_0_1_death_0_05_tree_sim_trees <- read.tree("simul_birth_0_1_death_0_05_tree_sim_trees.tre") # Plot the trees par(mfrow=c(1,2)) plot(bird_pruned_tree, show.tip.label = FALSE) title("Our bird_pruned_tree") add.scale.bar(cex = 0.7, font = 2, col = "red") plot(simul_birth_0_1_death_0_05_tree_sim_trees[[1]], show.tip.label = FALSE) title("One of simulated under the birth-death process with TreeSim") add.scale.bar(cex = 0.7, font = 2, col = "red")