# Correlation analyses Discrete data # 1) Make a working directory (e.g., Discrete correlation analyses) and select it as your working. setwd("/Users/jcsantos/Desktop/R_class_winter_2015_home/0_Bio559R_course_final_files/week_11/data") # 2) Several packages and functions are available for this purpose. library(ape) library(geiger) library(phytools) install.packages("corHMM") install.packages("caper") library(corHMM) library(caper) ############### Prepare our two binary data (Migration and Flightless) ######################### # 3) Load our trees and data from last class and check concordance: bird_pruned_tree <- read.tree("bird_pruned_MB.newick") bird_pruned_tree birds_traits_data <- read.table("bird_data_pruned.txt", header = TRUE, sep = "\t") # 4) select only a binary variable # A data frame for migration and flightless birds_migration_flight_temp <- subset(birds_traits_data, select = c(Migration, Flightless)) # Select this binary data and make it a vector birds_migration_flight_data <- birds_migration_flight_temp[complete.cases(birds_migration_flight_temp),] # only complete cases birds_migration_flight_data # determine the structure of the data str(birds_migration_flight_data) # 'data.frame': 399 obs. of 2 variables: # $ Migration : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ... # $ Flightless: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 1 1 1 1 ... # make factors to characters birds_migration_flight_data$Migration <- as.character(birds_migration_flight_data$Migration) birds_migration_flight_data$Flightless <- as.character(birds_migration_flight_data$Flightless) birds_migration_flight_data <- cbind(species = rownames(birds_migration_flight_data), birds_migration_flight_data) #row names as column birds_migration_flight_data$species <- as.character(birds_migration_flight_data$species) str(birds_migration_flight_data) # 'data.frame': 399 obs. of 3 variables: # $ species : chr "Struthio_camelus" "Dromaius_novaehollandiae" "Apteryx_owenii" "Apteryx_haastii" ... # $ Migration : chr "No" "No" "No" "No" ... # $ Flightless: chr "Yes" "Yes" "Yes" "Yes" ... # make data truly binary (0-NO and 1-YES) birds_migration_flight_data$Migration[birds_migration_flight_data$Migration == "No"] <- "0" birds_migration_flight_data$Migration[birds_migration_flight_data$Migration == "Yes"] <- "1" birds_migration_flight_data$Flightless[birds_migration_flight_data$Flightless == "No"] <- "0" birds_migration_flight_data$Flightless[birds_migration_flight_data$Flightless == "Yes"] <- "1" head(birds_migration_flight_data) # 5) use the 'name.check' function to determine if we have concordance name.check (bird_pruned_tree, birds_migration_flight_data) # we check the concordance between a data file and a phylogenetic tree. # NOT CONCORDANT # Provides a list of species in tree with no data ($tree_not_data) # 6) We can make concordant need to prepare our data using the function 'treedata' birds_migration_flight <- treedata(bird_pruned_tree, birds_migration_flight_data) birds_migration_flight$phy # a class phylo birds_migration_flight$data # a matrix # 7) We are going to assign the data to specific objects to run functions birds_migration_flight_phy <- birds_migration_flight$phy birds_migration_flight_dat <- birds_migration_flight$data birds_migration_flight_dat_df <- as.data.frame(birds_migration_flight_dat) str(birds_migration_flight_dat_df) ######################## Using Phylotools -- Pagel (1994) ######################### # 8) Assign our data to objects birds_migration_flight_phy <- birds_migration_flight$phy birds_migration_flight_dat <- birds_migration_flight$data # 9) Make vectors of the binary traits with species names: # migration vector birds_migration_dat <- as.numeric(birds_migration_flight_dat[,2]) names(birds_migration_dat) <- birds_migration_flight_dat[,1] birds_migration_dat # flightless vector birds_flightless_dat <- as.numeric(birds_migration_flight_dat[,3]) names(birds_flightless_dat) <- birds_migration_flight_dat[,1] birds_flightless_dat # 10) Fit Pagel's (1994) function to test for correlated evolution of binary traits birds_migration_flight_binary_phytools_results <- fitPagel(birds_migration_flight_phy,x=birds_migration_dat,y=birds_flightless_dat) birds_migration_flight_binary_phytools_results # Pagel's binary character correlation test: # Independent model rate matrix: # 0|0 0|1 1|0 1|1 # 0|0 -0.006793975 0.0004214451 0.00637253 0.0000000000 # 0|1 0.015329598 -0.0217021274 0.00000000 0.0063725297 # 1|0 0.017867243 0.0000000000 -0.01828869 0.0004214451 # 1|1 0.000000000 0.0178672432 0.01532960 -0.0331968409 # Dependent model rate matrix: # 0|0 0|1 1|0 1|1 # 0|0 -0.006727236 0.00000000 0.006727236 0.000000000 # 0|1 0.022455373 -0.02245537 0.000000000 0.000000000 # 1|0 0.013897977 0.00000000 -0.015156642 0.001258666 # 1|1 0.000000000 0.03517540 0.000000000 -0.035175396 # Model fit: # log-likelihood # independent -237.2414 # dependent -232.9522 # Hypothesis test result: # likelihood-ratio: 8.578462 # p-value: 0.07254428 # Conclusion: migration and flightless are not correlated ################################## Using corHMM ############################### # 11) Assign our data to objects birds_migration_flight_phy <- birds_migration_flight$phy birds_migration_flight_dat_corHMM <- as.data.frame(birds_migration_flight$data) # 12) Make vectors of binary traits with species names in corHMM format: birds_migration_flight_dat_corHMM$species <- as.factor(birds_migration_flight_dat_corHMM$species) # as factor birds_migration_flight_dat_corHMM$Migration <- as.integer(birds_migration_flight_dat_corHMM$Migration) # as integer birds_migration_flight_dat_corHMM$Flightless <- as.integer(birds_migration_flight_dat_corHMM$Flightless) # as integer # Run the function: ARD -- All rates different birds_migration_flight_dat_corHMM_results<-corDISC(birds_migration_flight_phy,birds_migration_flight_dat_corHMM,ntraits=2,model="ARD", node.states="marginal", diagn=FALSE) # State distribution in data: # States: 1 2 3 4 # Counts: 6 124 15 254 # Initializing... # Finished. Beginning thorough search... # Finished. Inferring ancestral states using marginal reconstruction. # Finished. Performing diagnostic tests. birds_migration_flight_dat_corHMM_results # Fit # -lnL AIC AICc N.traits ntax # -1.386294 18.77259 19.14182 2 399 # Rates # (0,0) (0,1) (1,0) (1,1) # (0,0) NA 0 4.074536e-12 NA # (0,1) 2.546585e-12 NA NA 4.074536e-12 # (1,0) 2.764864e-12 NA NA 2.546585e-12 # (1,1) NA 0 0.000000e+00 NA # Arrived at a reliable solution ################################## Using BayesTraits ############################### # 13) our starting data bird_pruned_tree <- read.tree("bird_pruned_MB.newick") bird_pruned_tree birds_traits_data <- read.table("bird_data_pruned.txt", header = TRUE, sep = "\t") # 14) Transform body mass, metabolic rate, migration, flight birds_BayesTraits_temp <- subset(birds_traits_data, select = c(Genus_Species,Mass_g,BMR_kJ_per_h,Migration, Flightless)) # Select this binary data and make it a vector birds_BayesTraits_data <- birds_BayesTraits_temp[complete.cases(birds_BayesTraits_temp),] # only complete cases birds_BayesTraits_data$log_mass <- log10(birds_BayesTraits_data$Mass_g) birds_BayesTraits_data$log_BMR <- log10(birds_BayesTraits_data$BMR_kJ_per_h) birds_BayesTraits_data$Migration <- as.numeric(birds_BayesTraits_data$Migration) birds_BayesTraits_data$Migration[birds_BayesTraits_data$Migration == 1] <- "0" birds_BayesTraits_data$Migration[birds_BayesTraits_data$Migration == 2] <- "1" birds_BayesTraits_data$Flightless <- as.numeric(birds_BayesTraits_data$Flightless) birds_BayesTraits_data$Flightless[birds_BayesTraits_data$Flightless == 1] <- "0" birds_BayesTraits_data$Flightless[birds_BayesTraits_data$Flightless == 2] <- "1" birds_BayesTraits_data_end <- subset(birds_BayesTraits_data, select = c(Genus_Species,Migration, Flightless, log_mass, log_BMR)) head(birds_BayesTraits_data_end) # Genus_Species Migration Flightless log_mass log_BMR # Struthio_camelus Struthio_camelus 0 1 4.965672 2.3215156 # Dromaius_novaehollandiae Dromaius_novaehollandiae 0 1 4.589950 2.1005084 # Apteryx_owenii Apteryx_owenii 0 1 3.138934 1.1525941 # Apteryx_haastii Apteryx_haastii 0 1 3.402949 1.2792105 # Apteryx_australis Apteryx_australis 0 1 3.496515 1.2201081 # Nothoprocta_perdicaria Nothoprocta_perdicaria 0 0 2.660865 0.8027737 # 15) use the 'name.check' function to determine if we have concordance name.check (bird_pruned_tree, birds_BayesTraits_data_end) # we check the concordance between a data file and a phylogenetic tree. # NOT CONCORDANT # Provides a list of species in tree with no data ($tree_not_data) # 16) We make concordant data using the function 'treedata' birds_BayesTraits <- treedata(bird_pruned_tree, birds_BayesTraits_data_end) birds_BayesTraits$phy # a class phylo birds_BayesTraits$data # a matrix # 17) We need to save the resulting data and tree in the working directory birds_BayesTraits_phy <- birds_BayesTraits$phy write.nexus(birds_BayesTraits_phy, file="birds_BayesTraits_phy.nex") birds_BayesTraits_dat <- birds_BayesTraits$data birds_BayesTraits_dat_df <- as.data.frame(birds_BayesTraits$data) write.table(birds_BayesTraits_dat_df, file ="birds_BayesTraits_data.txt", sep ="\t") # 18) Make a folder to run Bayes Traits and copy the program, tree and the formatted data #./BayesTraits TreeFile DataFile # ./BayesTraits_B birds_BayesTraits_phy.nex birds_BayesTraits_data_formated.txt # 19) The following menu will appear: # 1) MultiState # 2) Discrete: Independent # 3) Discrete: Dependant # 4) Continuous: Random Walk (Model A) # 5) Continuous: Directional (Model B) # 6) Continuous: Regression # 7) Independent contrast # 20) select the option 2 (Discrete: Independent) # 2 # 21) Select the type of analyses: ML (you can also use the MCMC, please refer to the manual) # Please Select the analysis method to use. # 1) Maximum Likelihood. # 2) MCMC # 1 # Options: # Model: Discete Independant # Tree File Name: birds_BayesTraits_phy.nex # Data File Name: birds_BayesTraits_data_formated.txt # Log File Name: birds_BayesTraits_data_formated.txt.log.txt # Summary: False # Seed 603255648 # Analsis Type: Maximum Likelihood # ML attempt per tree: 10 # Precision: 64 bits # Cores: 1 # No of Rates: 4 # Base frequency (PI's) None # Character Symbols: 00,01,10,11 # Using a covarion model: False # Restrictions: # alpha1 None # beta1 None # alpha2 None # beta2 None # Tree Information # Trees: 1 # Taxa: 399 # Sites: 1 # States: 4 # 22) Initiate the analyses. Type: Run # 23) Results of the independent run Tree No Lh alpha1 beta1 alpha2 beta2 Root - P(0,0) Root - P(0,1) Root - P(1,0) Root - P(1,1) 1 -237.241440 0.006373 0.017867 0.000421 0.015329 0.0591450.466354 0.053405 0.421096 # 24) Make a folder to run Bayes Traits and copy the program, tree and the formated data # ./BayesTraits_B birds_BayesTraits_phy.nex birds_BayesTraits_data_formated.txt # 26) The following menu will appear: # 1) MultiState # 2) Discrete: Independent # 3) Discrete: Dependant # 4) Continuous: Random Walk (Model A) # 5) Continuous: Directional (Model B) # 6) Continuous: Regression # 7) Independent contrast # 27) select the option 3 (Discrete: dependent) # 3 # 28) Select the type of analyses: ML or MCMC # Please Select the analysis method to use. # 1) Maximum Likelihood. # 2) MCMC # 1 # Options: # Model: Discete Dependant # Tree File Name: birds_BayesTraits_phy.nex # Data File Name: birds_BayesTraits_data_formated.txt # Log File Name: birds_BayesTraits_data_formated.txt.log.txt # Summary: False # Seed 2062873966 # Analsis Type: Maximum Likelihood # ML attempt per tree: 10 # Precision: 64 bits # Cores: 1 # No of Rates: 8 # Base frequency (PI's) None # Character Symbols: 00,01,10,11 # Using a covarion model: False # Restrictions: # alpha1 None # beta1 None # alpha2 None # beta2 None # Tree Information # Trees: 1 # Taxa: 399 # Sites: 1 # States: 4 # 29) Initiate the analyses. Type: Run # 30) Results of the independent run Tree No Lh q12 q13 q21 q24 q31 q34 q42 q43 Root - P(0,0) Root - P(0,1) Root - P(1,0) Root - P(1,1) 1 -235.593118 0.000000 0.006474 0.031929 0.016356 0.0129290.001200 0.085211 0.004554 0.027314 0.451951 0.063135 0.457600 # 31) We can use LRT to compare the likelihood fit # Discrete dependent: -237.241440 and 4 df # Discrete independent: -235.593118 and 8 df log_dif_migration_flight <- 2*(-235.593118 - (-237.241440)) log_dif_migration_flight # [1] 3.296644 p_value_migration_flight <- pchisq(log_dif_migration_flight, 8-4, lower.tail=FALSE) p_value_migration_flight # [1] 0.5094642 # Conclusion: migration and flightless are not correlated