#Supertrees and data concordance #1) set working directory setwd("/Users/jcsantos/Desktop/R_class_winter_2015_home/8_tree_retrival/supertree/") #2) read 'ape' package library(ape) library(geiger) library(phytools) #3) read some bird trees repository from: http://birdtree.org/ using 'read.tree' function #http://www.nature.com/nature/journal/v491/n7424/full/nature11631.html bird_trees <-read.tree(file = "bird_tree.tree") bird_trees #5 trees in loaded bird_trees [[1]]# first tree a very large tree bird_tree_1 <- bird_trees [[1]]# assign first tree to object class phylo # Phylogenetic tree with 9993 tips and 9992 internal nodes. #plot(bird_tree_1, edge.width = 0.5, show.tip.label = FALSE, type = "fan") #It will take a while for a laptop #add.scale.bar(cex = 0.7, font = 2, col = "red") #4) check if our tree is ultrametric and binary is.ultrametric(bird_tree_1) # [1] TRUE is.binary.tree(bird_tree_1) # [1] TRUE-- rooted tree is considered binary if all nodes (including the root node) have exactly two descendant nodes #5) check if all our trees are ultrametric and binary lapply(1:5, function (x) {is.ultrametric (bird_trees [[x]])}) # if we want to check our 5 trees, create a function and run in loop lapply(1:5, function (x) {is.binary.tree (bird_trees [[x]])}) # similarly for is.binary #6) read our table of bird metabolic rates and other traits birds_traits_MR <- read.table("Birds_mass_BM_McNab_2009_updated_tab.txt", header = TRUE, sep = " ") #7) we need to assign row names as the species rownames(birds_traits_MR) <- birds_traits_MR$Genus_Species head(birds_traits_MR) #8) drop and create a common tree with both data and phylogeny bird_phylogeny_data <- treedata(bird_tree_1, birds_traits_MR) #long list of warnings, those are taxa or data that are not matched bird_phylogeny_data #we got a decent size tree #Phylogenetic tree with 451 tips and 450 internal nodes. #9) We can save our tree and data that is a new pruned tree bird_pruned <- bird_phylogeny_data$phy write.tree(bird_pruned, file ="bird_pruned_MB.newick") #10) We can do the same for the data bird_data_pruned <- bird_phylogeny_data$data class(bird_data_pruned)# is a matrix write.table(bird_data_pruned, file = "bird_data_pruned.txt", sep ="\t") #11) Let's read back our reduced tree bird_pruned_tree <- read.tree("bird_pruned_MB.newick") bird_pruned_tree plot(bird_pruned_tree, edge.width = 0.5, show.tip.label = FALSE, type = "fan") add.scale.bar(cex = 0.7, font = 2, col = "red") #12) Let's do the same for our reduced data birds_traits_MR <- read.table("bird_data_pruned.txt", header = TRUE, sep = "\t") birds_traits_MR #13) To plot, we need to sort variable values to match phylogeny names order match_birds_traits_MR <- match(bird_pruned_tree$tip.label, rownames(birds_traits_MR)) #use match function sorted_birds_traits_MR <- as.data.frame(birds_traits_MR[match_birds_traits_MR,]) sorted_birds_traits_MR #14) Prepare data for plotting. Climate and Food. We are going to use colored label for discrete traits #Climate Climate_birds_label <- character(length(sorted_birds_traits_MR$Climate)) names(Climate_birds_label) <- rownames(sorted_birds_traits_MR) #get the unique states for Climate unique(sorted_birds_traits_MR$Climate) #[1] temperate tropical polar temperate_tropical #label states with colors for plot Climate_birds_label[sorted_birds_traits_MR$Climate=="polar"] <- "blue" #assign light blue for tropical Climate_birds_label[sorted_birds_traits_MR$Climate=="temperate"] <- "light blue" #assign light blue for tropical Climate_birds_label[sorted_birds_traits_MR$Climate=="temperate_tropical"] <- "orange" #assign yellow for tropical Climate_birds_label[sorted_birds_traits_MR$Climate=="tropical"] <- "yellow" Climate_birds_label #Food Food_birds_label <- character(length(sorted_birds_traits_MR$Food)) names(Food_birds_label) <- rownames(sorted_birds_traits_MR) #get the unique states for Time unique(sorted_birds_traits_MR$Food) #[1] vegetarian carnivore omnivore #label states with colors for plot Food_birds_label[sorted_birds_traits_MR$Food=="vegetarian"] <- "green3" #assign color Food_birds_label[sorted_birds_traits_MR$Food=="carnivore"] <- "red" #assign color Food_birds_label[sorted_birds_traits_MR$Food=="omnivore"] <- "darkorchid" #assign color Food_birds_label #15) plot data plot(bird_pruned_tree, cex=1, show.tip.label = FALSE) #Plot a tree that leaves some room between the tree tips and taxon labels so that we can plot habitat use in this space points(rep(120, nrow(sorted_birds_traits_MR)), 1:nrow(sorted_birds_traits_MR), pch=22, bg=Climate_birds_label[bird_pruned_tree$tip.label], lwd = 0.05, col="white", cex=1.5) points(rep(122.5, nrow(sorted_birds_traits_MR)), 1:nrow(sorted_birds_traits_MR), pch=22, bg=Food_birds_label[bird_pruned_tree$tip.label], lwd = 0.05, col="white", cex=1.5) #16) Let's concentrate in an smaller clade Tyrannidae or Columbidae trees. We are going to read a specific tree for this family, that is included in the large tree tyrannidae_trees <- read.tree("tyrannidae.tree") tyrannidae_trees #5 trees tyrannidae_1<- tyrannidae_trees [[1]] # pick only 1 #large tree with 460 tips and 459 internal nodes. #17) Let's match this tree with our data tyrannidae_phylogeny_data <- treedata(tyrannidae_1, birds_traits_MR) #long list of warnings, those are taxa or data that are not matched tyrannidae_phylogeny_data #we got a much smaller tree, but usueful for exploring #18) Let's our reduced tyrannidae phylogeny and its data and write the corresponding files tyrannidae_reduced_tree <- tyrannidae_phylogeny_data$phy tyrannidae_reduced_data <- tyrannidae_phylogeny_data$data write.tree(tyrannidae_reduced_tree, file ="tyrannidae_reduced_tree.newick") write.table(tyrannidae_reduced_data, file = "tyrannidae_reduced_data.txt", sep ="\t") tyrannidae_reduced_data <- read.table("tyrannidae_reduced_data.txt", header = TRUE, sep = "\t")# read data to avoid being considered as matrix plot(tyrannidae_reduced_tree, edge.width = 2, label.offset = 0.005) #smaller tree add.scale.bar(cex = 1, font = 2, col = "red") #19) Let's plot some variables in the Tyrannidae tree. #We need to sort variable values to match phylogeny names order match_tyrannidae_traits <- match(tyrannidae_reduced_tree$tip.label, rownames(tyrannidae_reduced_data)) #use match function sorted_tyrannidae_traits <- as.data.frame(tyrannidae_reduced_data[match_tyrannidae_traits,]) sorted_tyrannidae_traits #20) Most discrete traits are fixed (1 state for all species) with the exception of # Climate, Migration and Food. We are going to use colored label for discrete traits Climate_label <- character(length(sorted_tyrannidae_traits$Climate)) names(Climate_label) <- rownames(sorted_tyrannidae_traits) Climate_label[sorted_tyrannidae_traits$Climate=="temperate"] <- "light blue" #assign light blue for tropical Climate_label[sorted_tyrannidae_traits$Climate=="tropical"] <- "yellow" #assign yellow for tropical Climate_label #21) Let's plot Body mass, BMR and Climate plot(tyrannidae_reduced_tree, cex=1, y.lim=c(0,13), x.lim=c(0,1.3)) #Plot a tree that leaves some room between the tree tips and taxon labels so that we can plot habitat use in this space points(rep(1, nrow(sorted_tyrannidae_traits)), 1:nrow(sorted_tyrannidae_traits), pch=21, bg="red", col="white", lwd=0.25, cex=sorted_tyrannidae_traits$Mass_g/5) points(rep(1.1, nrow(sorted_tyrannidae_traits)), 1:nrow(sorted_tyrannidae_traits), pch=21, bg="green", col="white", lwd=0.25, cex=sorted_tyrannidae_traits$BMR_kJ_per_h*5) points(rep(1.2, nrow(sorted_tyrannidae_traits)), 1:nrow(sorted_tyrannidae_traits), pch=22, bg=Climate_label[tyrannidae_reduced_tree$tip.label], lwd=0.25, cex=5) text(1, nrow(sorted_tyrannidae_traits)+1, "Mass (g)", pos=1, cex=1) text(1.1, nrow(sorted_tyrannidae_traits)+1, "BMR (kJ/h)", pos=1, cex=1) text(1.2, nrow(sorted_tyrannidae_traits)+1, "Climate", pos=1, cex=1)