# Ancestral state reconstruction (continuous)

# Once we have our phylogeny and data paired, we can reconstruct character evolution by 
# inferring ancestral character states. 

# 1) Make a working directory (e.g., discrete) and select it as your working. 

setwd("/Users/jcsantos/Desktop/R_class_winter_2015_home/0_Bio559R_course_final_files/week_8_office/data")

# 2) We need to download these files from the course website into our working directory:

bird_data_pruned.txt
bird_pruned_MB.newick
tyrannidae_reduced_data.txt
tyrannidae_reduced_tree.newick

# 3) Several packages and functions are available for this purpose. 

library(ape)
library(geiger)
library(phytools)

# 4) Load our trees and data from last class and check concordance:

bird_pruned_tree <- read.tree("bird_pruned_MB.newick")
bird_pruned_tree
birds_traits_data <- read.table("bird_data_pruned.txt", header = TRUE, sep = "\t")
birds_traits_data

# use the 'name.check' function to determine if we have concordance

name.check (bird_pruned_tree, birds_traits_data) # we check the concordance between a data file and a phylogenetic tree.
# [1] "OK" # we have concordance (i.e., we the same number of tips and data per tip)


# this is your exercise:

tyrannidae_reduced_tree <- read.tree("tyrannidae_reduced_tree.newick")
tyrannidae_reduced_data <- read.table("tyrannidae_reduced_data.txt", header = TRUE, sep = "\t")
name.check (tyrannidae_reduced_tree, tyrannidae_reduced_data) # we check the concordance between a data file and a phylogenetic tree.
# [1] "OK" # we have concordance

#5) We need to make sure that our tree is ultrametric tree and preferably bifurcating. 

is.ultrametric(bird_pruned_tree) # TRUE
is.binary.tree(bird_pruned_tree) # TRUE all nodes (including the root node) have exactly two descendant nodes

is.ultrametric(tyrannidae_reduced_tree) # TRUE
is.binary.tree(tyrannidae_reduced_tree) # TRUE all nodes (including the root node) have exactly two descendant nodes


#6) Let's explore our continuous data for both all birds by Order

library(ggplot2)

## No log10

ggplot(birds_traits_data, aes(x = Order, y = Mass_g/1000, fill = Order)) + 
       geom_boxplot() +
       theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
       labs(x = "Aves -- Order", y = "Body Mass (kg)", fill = NULL)

## log10

ggplot(birds_traits_data, aes(x = Order, y = log10(Mass_g), fill = Order)) + 
       geom_boxplot() +
       theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
       labs(x = "Aves -- Order", y = "Log10 Body Mass (g)", fill = NULL)


#7) Let's explore our continuous data for both all birds by Family

## No log10

ggplot(birds_traits_data, aes(x = Family, y = Mass_g/1000, fill = Family)) + 
       geom_boxplot() +
       theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
       labs(x = "Aves -- Family", y = "Body Mass (kg)", fill = NULL)

## log10

ggplot(birds_traits_data, aes(x = Family, y = log10(Mass_g), fill = Family)) + 
       geom_boxplot() +
       theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
       labs(x = "Aves -- Family", y = "Log10 Body Mass (g)", fill = NULL)


#7) It is apparent that at the Aves level the data is not normal, but within Tyrannidae it might not be necessary
# Let's test for normality

shapiro.test(birds_traits_data$Mass_g)# Aves level

#	Shapiro-Wilk normality test
#
# data:  birds_traits_data$Mass_g
# W = 0.1023, p-value < 2.2e-16

#Let's create a variable that has the log10 transformed body mass data

birds_traits_data$log10_Mass_g <- log10(birds_traits_data$Mass_g)
head(birds_traits_data)

shapiro.test(birds_traits_data$log10_Mass_g)# Aves level

#	Shapiro-Wilk normality test
#
# data:  birds_traits_data$log10_Mass_g
# W = 0.9479, p-value = 1.69e-11

#The test is very conservative, but we can explore QQ plots (Quantile-Quantile Plots)


#8) Let's plot the QQ plots, Quantile-Quantile Plots

install.packages ("nortest") #test for normality
library(nortest)

par(mfrow=c(1,2))
qqnorm(birds_traits_data$Mass_g, main = "Normal Q-Q Plot -- Aves -- Body Mass")
qqnorm(birds_traits_data$log10_Mass_g, main = "Normal Q-Q Plot -- Aves -- log10 Body Mass")

# The log transformation has improved the distribution. It is not normal, but given the
# large sample size, it is not that important for regression analyses.
# other transformations are possible, but the interpretation of such transformed data
# will be even harder. It will not change the results if we use robust statistical methods.

# See presentation for information about transformations

#9) plot data for bird tree for continuous variable

# We need to sort variable values to match phylogeny names order

match_birds_data <- match(bird_pruned_tree$tip.label, rownames(birds_traits_data)) #use match function
sorted_birds_data <- as.data.frame(birds_traits_data[match_birds_data,])
sorted_birds_data

# print phylogeny next to transform data

matrix_graph <- matrix(c(1,2),1) # matrix to locate plots, plot 1 -- left; plot 2 -- right
layout(matrix_graph, c(0.6,0.4))
par(mar=c(4.1,1,1,0)) #A numerical vector of the form c(bottom, left, top, right) which gives the number of lines of margin 
plot(bird_pruned_tree, show.tip.label=FALSE) #Plot a tree that leaves some room between the tree tips and taxon labels so that we can plot habitat use in this space
par(mar=c(4.1,0,1,1)) #A numerical vector of the form c(bottom, left, top, right) which gives the number of lines of margin 
barplot(sorted_birds_data$log10_Mass_g,horiz=TRUE,width=1,space=0,names="")


##############################################################################################################
##############################################################################################################
##############################################################################################################

# Using 'ape'

#10) We will use the ace (Ancestral Character Estimation) function of ape to reconstruct ancestral character states using maximum likelihood: 

# ace(x, phy, type = "continuous", 

# method = "REML" # first estimates the ancestral value at the root (aka, the phylogenetic mean), 
                # then the variance of the Brownian motion process is estimated by optimizing 
                # the residual log-likelihood. The ancestral values are finally inferred from 
                # the likelihood function giving these two parameters. 
# method = "pic" # the confidence intervals are computed using the expected variances under the model, 
               #so they depend only on the tree.
# method = "GLS" # similar to "pic"
#
#         CI = TRUE,     # 95% confidence intervals
#     model = "BM",     #  Only Brownian motion option
#    scaled = TRUE, 
#     kappa = 1,          # if branch lengths needs transformation, a positive value giving the exponent for such transformation
# corStruct = NULL, 
#        ip = 0.1,
#    use.expm = FALSE,
#    use.eigen = TRUE, 
#    marginal = FALSE)  # returns the node marginal reconstruction (likelihood values) at each node


# 9) Let's explore the method "REML":

# This is the Brownian motion-based maximum likelihood (ML) estimator of Schluter et. al. (1997). 
# We use REML to provide unbiased estimates (compared to "ML") of the variance of the Brownian motion process  

log10_Mass_bird_ace_REML <- ace(birds_traits_data$log10_Mass_g, bird_pruned_tree, type="continuous", model = "REML", marginal = TRUE) 
log10_Mass_bird_ace_REML


#    Ancestral Character Estimation
#    Call: ace(x = birds_traits_data$log10_Mass_g, phy = bird_pruned_tree, 
#    type = "continuous", model = "REML", marginal = TRUE)
#
#    Residual log-likelihood: 1757.884 <--- These are loglik (the log-likelihood of the most likely reconstruction)
#
# $ace <--_ ace These form a vector of reconstructed node values
#
#      452       453       454       455       456       457       458       459       460       461       462       463       464       465       466       467       468 
# 3.0204793 3.2499122 3.0810056 3.3392881 3.7115866 3.5490073 2.9503006 2.9305053 2.6328168 2.0877089 2.6112151 2.5544724 2.6542317 2.6849797 1.8522521 2.0165004 2.6543825 
# ...
#
# $sigma2   <--- A two element vector of the rate estimate and its standard error
# [1] 0.0094210721 0.0006407525
# 
# $CI95   <-- The 95% confidence intervals around the vector of reconstructed node values
#         [,1]     [,2]
# 452 2.1130802 3.927878

# 10) Let's explore the method "pic":

# This is also a Brownian-motion based model used Felsenstein's (1985) phylogenetic independent contrasts (pic). 
# However, it only takes descendants of each node into account in reconstructing the state at that node. 
# More basal nodes are ignored. 

log10_Mass_bird_ace_pic <- ace(birds_traits_data$log10_Mass_g, bird_pruned_tree, type="continuous", model = "pic", marginal = TRUE) 
log10_Mass_bird_ace_pic

#    Ancestral Character Estimation
#    Call: ace(x = birds_traits_data$log10_Mass_g, phy = bird_pruned_tree, 
#    type = "continuous", model = "pic", marginal = TRUE)
#
#    Residual log-likelihood: 1757.884 <--- These are loglik (the log-likelihood of the most likely reconstruction)
#
# $ace <--_ ace These form a vector of reconstructed node values
#
#      452       453       454       455       456       457       458       459       460       461       462       463       464       465       466       467       468 
# 3.0204793 3.2499122 3.0810056 3.3392881 3.7115866 3.5490073 2.9503006 2.9305053 2.6328168 2.0877089 2.6112151 2.5544724 2.6542317 2.6849797 1.8522521 2.0165004 2.6543825 
# ...
#
# $sigma2   <--- A two element vector of the rate estimate and its standard error
# [1] 0.0094210721 0.0006407525
# 
# $CI95   <-- The 95% confidence intervals around the vector of reconstructed node values
#         [,1]     [,2]
# 452 2.1130802 3.927878


birds_log10_Mass_g <- c(birds_traits_data$log10_Mass_g, log10_Mass_bird_ace_pic$ace)

# 10) Let's plot our reconstructions in our tree

plot(bird_pruned_tree, type = "fan", show.tip.label=FALSE)
  tiplabels(pch = 21, cex=birds_traits_data$log10_Mass_g/2)
  nodelabels(pch = 21, cex=log10_Mass_bird_ace_pic$ace/2)
  axisPhylo()

#######################################  Phytools  #################################################
#11) For this package, we need to isolated our trait of interest and then give the 
# corresponding names in the phylogeny

birds_traits_data_log10_Mass_g <- birds_traits_data$log10_Mass_g
names(birds_traits_data_log10_Mass_g) <- birds_traits_data$Genus_Species

# 12) Let's try the reconstructions using phytools.

# fastAnc (from the manual)
#
# This function performs fast estimation of the ML ancestral states for a continuous trait 
# by taking advantage of the fact that the state computed for the root node of the tree 
# during Felsenstein's (1985) contrasts algorithm is also the maximum-likelihood estimation (MLE)
# of the root node. Thus, the function reroots the tree at all internal nodes and computes 
# the contrasts state at the root each time. 

log10_Mass_bird_fastAnc <- fastAnc(bird_pruned_tree, birds_traits_data_log10_Mass_g, CI = TRUE)
log10_Mass_bird_fastAnc

# $ace  <--- ancestral state estimates
#      452       453       454       455       456       457       458       459       460       461       462       463       464       465       466       467       468 
# 2.8144783 3.2564965 3.8268806 3.8844212 3.3854237 3.3461974 2.4683799 2.5775496 2.4386633 2.1295003 2.5220832 2.4198142 2.1598356 2.0405282 2.0243548 2.7157835 2.7011164 
# ...
#
# $CI95 <--- Upper and lower 95-percent confidence intervals
#          [,1]     [,2]
# 452 2.0892972 3.539659
# 453 2.5456000 3.967393
# 454 3.1860535 4.467708
# ...


# anc.ML (from the manual)
#
# This function estimates the evolutionary parameters and ancestral states for Brownian evolution using likelihood. 
# It is also possible (for model="BM") to allow for missing data for some tip taxa.

log10_Mass_bird_ancML_BM <- anc.ML(bird_pruned_tree, birds_traits_data_log10_Mass_g, model="BM")
log10_Mass_bird_ancML_BM

# The results should be similar to the fastAnc

# $sig2
#             
# 0.003024751 
#
# $ace
#       452       453       454       455       456       457       458       459       460       461       462       463       464       465       466       467       468 
# 2.8144783 3.2564965 3.8268806 3.8844212 3.3854237 3.3461974 2.4683799 2.5775496 2.4386633 2.1295003 2.5220832 2.4198142 2.1598356 2.0405282 2.0243548 2.7157835 2.7011164 
# ...
# $logLik
# [1] 522.9801
#
# $counts
# function gradient 
#       21       21 
#
# $convergence
# [1] 52
#
# $message
# [1] "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH"
#
# $model
# [1] "BM"
# 
# attr(,"class")
# [1] "anc.ML"

# This applies for the OU model

#log10_Mass_bird_ancML_OU <- anc.ML(bird_pruned_tree, birds_traits_data_log10_Mass_g, model="OU")
#log10_Mass_bird_ancML_OU


In mathematics, the Ornstein–Uhlenbeck process (named after Leonard Ornstein and George Eugene Uhlenbeck), is a stochastic process that, roughly speaking, describes the velocity of a massive Brownian particle under the influence of friction. The process is stationary, Gaussian, and Markovian, and is the only nontrivial process that satisfies these three conditions, up to allowing linear transformations of the space and time variables.[1] Over time, the process tends to drift towards its long-term mean: such a process is called mean-reverting.

The process can be considered to be a modification of the random walk in continuous time, or Wiener process, in which the properties of the process have been changed so that there is a tendency of the walk to move back towards a central location, with a greater attraction when the process is further away from the centre. The Ornstein–Uhlenbeck process can also be considered as the continuous-time analogue of the discrete-time AR(1) process.


# 13) Plot our reconstructions in our tree (from example of phytools)

log10_Mass_bird_fastAnc_contMap <- contMap(bird_pruned_tree, birds_traits_data_log10_Mass_g, spread.labels = TRUE)
plot(log10_Mass_bird_fastAnc_contMap, type = "fan", show.tip.label=FALSE)

##########################################################################################
##########################################################################################
###################################### MARCH 3rd #########################################
##########################################################################################
##########################################################################################


Last class continuation on Continuous Character Reconstrution:

# working directory

setwd("/Users/jcsantos/Desktop/R_class_winter_2015_home/0_Bio559R_course_final_files/week_9/data")

# libraries

library(ape)
library(geiger)
library(phytools)

# Load our trees and data from last class and check concordance:

bird_pruned_tree <- read.tree("bird_pruned_MB.newick")
bird_pruned_tree
birds_traits_data <- read.table("bird_data_pruned.txt", header = TRUE, sep = "\t")
birds_traits_data

# Data transformation:

birds_traits_data$log10_Mass_g <- log10(birds_traits_data$Mass_g)
head(birds_traits_data)

######################################### Geiger #########################################

# Similar to fitDiscrete, fitContinuous is a very flexible function of 'geiger’. 
# This method provides a set of statistics that allows us to decide which model best 
# describes the trait that we want to reconstruct. 


# 14) First, we need to prepare our data using the function 'treedata'

log10_Mass_g_birds_geiger <- treedata(bird_pruned_tree, birds_traits_data)
log10_Mass_g_birds_geiger$phy # a class phylo
log10_Mass_g_birds_geiger$data # a matrix

# 15) We are going to prepare our data as before, but we are going to use the modified code 
# for the function that is present in the website: 

phy <- log10_Mass_g_birds_geiger$phy
all_dat <- log10_Mass_g_birds_geiger$data
dat <- as.numeric(all_dat[,15])
names(dat) <- all_dat[,1]

######################## Compare_models_fitContinuous: copy the function as is indicated below #########################


results_fitContinuous_one_trait <- function (phy,dat) {
##### test concordance ####

print(name.check (phy, dat))

#### COMPARE MODELS ####

models=c("BM", "OU", "EB", "white", "lambda", "kappa", "delta") # vector that define set of models to compare
summaries=c("Brownian Motion -- BM", "Ornstein-Uhlenbeck -- OU", "Early Burst -- EB", "white noise -- non-phylogenetic", "Pagel's lambda", "Pagel's kappa", "Pagel's delta")
summaries_2=c("Brownian Motion with no ME -- BM", "Ornstein-Uhlenbeck with no ME -- OU", "Early Burst with no ME -- EB", "white noise with no ME -- non-phylogenetic", "Pagel's lambda with no ME ", "Pagel's kappa with no ME ", "Pagel's delta with no ME ")


## ESTIMATING measurement error ##

aic.se <- numeric(length(models)) #creates a numeric vector for the four models with 0
aicc.se <- numeric(length(models))
lnl.se <- numeric(length(models))
parameters.se <- numeric(length(models))
alpha.se <- numeric(length(models))
eb_a.se <- numeric(length(models))
lambda_est.se <- numeric(length(models))
kappa_est.se <- numeric(length(models))
delta_est.se <- numeric(length(models))


for(m in 1:length(models)){       # m is a variable that corresponds 1:4 the total number of models on the vector models
		cat("\n\n\t*** ", paste(toupper(summaries[m]),": fitting ", sep=""), models[m]," with SE *** \n", sep="") # print this title before calculating
		tmp=fitContinuous(phy,dat,SE=NA, model=models[m],bounds=list(SE=c(0,0.5)))
		print(tmp)
		aic.se[m]=round(tmp$opt$aic,4)
		aicc.se[m]=round(tmp$opt$aicc,4)
		lnl.se[m]=round(tmp$opt$lnL,4)
		parameters.se[m]=tmp$opt$k
		alpha.se[m] <- ifelse(!is.null(tmp$opt$alpha), round(tmp$opt$alpha,4), "------")
		eb_a.se[m] <- ifelse(!is.null(tmp$opt$a), round(tmp$opt$a,4), "------")
		lambda_est.se[m] <- ifelse(!is.null(tmp$opt$lambda), round(tmp$opt$lambda,4), "------")
		kappa_est.se[m] <- ifelse(!is.null(tmp$opt$kappa), round(tmp$opt$kappa,4), "------")
		delta_est.se[m] <- ifelse(!is.null(tmp$opt$delta), round(tmp$opt$delta,4), "------")
                            }

### Get list of parameters ###

parameter_matrix.se <- matrix(,length(models), 5)
parameter_matrix.se[] <- c(alpha.se,eb_a.se,lambda_est.se,kappa_est.se,delta_est.se)
parameter.se_df <- as.data.frame (parameter_matrix.se)
row.names(parameter.se_df) <- models
names(parameter.se_df) <- c("alpha", "EB-a", "lambda", "kappa", "delta")
parameter.se_df

## ASSUMING no measurement error ##
aic <- numeric(length(models))
aicc <- numeric(length(models))
lnl <- numeric(length(models))
parameters <- numeric(length(models))
alpha <- numeric(length(models))
eb_a <- numeric(length(models))
lambda_est <- numeric(length(models))
kappa_est <- numeric(length(models))
delta_est <- numeric(length(models))

for(m in 1:length(models)){
		cat("\n\n\n\n\t*** ", paste(toupper(summaries_2[m]),": fitting ", sep=""), models[m]," *** \n", sep="")
		tmp=fitContinuous(phy,dat,SE=0,model=models[m])
		print(tmp)
		aic[m]=round(tmp$opt$aic,4)
		aicc[m]=round(tmp$opt$aicc,4)
		lnl[m]=round(tmp$opt$lnL,4)
		parameters[m]=tmp$opt$k
		alpha[m] <- ifelse(!is.null(tmp$opt$alpha), round(tmp$opt$alpha,4), "------")
		eb_a[m] <- ifelse(!is.null(tmp$opt$a), round(tmp$opt$a,4), "------")
		lambda_est[m] <- ifelse(!is.null(tmp$opt$lambda), round(tmp$opt$lambda,4), "------")
		kappa_est[m] <- ifelse(!is.null(tmp$opt$kappa), round(tmp$opt$kappa,4), "------")
		delta_est[m] <- ifelse(!is.null(tmp$opt$delta), round(tmp$opt$delta,4), "------")
                          }

### Get list of parameters ###

parameter_matrix <- matrix(,length(models), 5)
parameter_matrix[] <- c(alpha,eb_a,lambda_est,kappa_est,delta_est)
parameter_df <- as.data.frame (parameter_matrix)
row.names(parameter_df) <- models
names(parameter_df) <- c("alpha", "EB-a", "lambda", "kappa", "delta")
parameter_df


## COMPARE AIC ##
names(aic.se)<-names(lnl.se)<-names(aic)<-names(lnl)<-models # assign the same name of the model to estimated parameters
delta_aic<-function(x) x-x[which(x==min(x))] #define difference (delta) AIC function that subtract the model with the lowest AIC (i.e., the best scoring)

# no measurement error
daic <- round(delta_aic(aicc),3)
daic_sig <- matrix(,1,length(daic))

for (tt in 1:length(daic)) {
                      ifelse (daic[tt] <= 3, daic_sig [1,tt] <- "ns", 
                      ifelse( daic[tt] > 3 & daic[tt] <= 6, daic_sig [1,tt] <- "*",
                      ifelse( daic[tt] > 6 & daic[tt] <= 9, daic_sig [1,tt] <- "**", 
                      daic_sig [1,tt] <- "***")
                    )
                 )
                          }

	cat("\n\n\t\t\t\t*** MODEL COMPARISON no measurement error --log10_Mass_g *** \n",sep="")
	cat("\tdelta-AIC values for models assuming no measurement error
    \t\t\t\t zero indicates the best model\n\n")

bb <-  as.data.frame(rbind(daic,c(daic_sig)))
names(bb) <- models
row.names(bb) <- c("delta_aicc", "significance")
print(bb)

# measurement error
daic.se <- round(delta_aic(aicc.se),3)
daic.se_sig <- matrix(,1,length(daic.se))

for (tt in 1:length(daic.se)) {
                      ifelse (daic.se[tt] <= 3, daic.se_sig [1,tt] <- "ns", 
                      ifelse(daic.se[tt] > 3 & daic.se[tt] <= 6, daic.se_sig [1,tt] <- "*",
                      ifelse(daic.se[tt] > 6 & daic.se[tt] <= 9, daic.se_sig [1,tt] <- "**", 
                      daic.se_sig [1,tt] <- "***")
                    )
                 )
                          }
	
	cat("\n\n\t\t\t\t*** MODEL COMPARISON with measurement error -- log10_Mass_g ***\n",sep="")
	cat("\t\t   delta-AIC values for models estimating SE
    \t\t\t\t zero indicates the best model\n\n")
cc <-  as.data.frame(rbind(daic.se,c(daic.se_sig)))
names(cc) <- models
row.names(cc) <- c("delta_aicc.se", "significance_SE")
print(cc)
	cat("\n\n")

	res_aicc <- rbind(aic, daic, daic_sig, aic.se, daic.se, daic.se_sig)
	rownames(res_aicc) <- c("AICc","dAICc", "significance", "AICc_SE","dAICc_SE", "significance_SE")
	res_aicc_df <- as.data.frame(res_aicc)
	print(res_aicc_df)
	cat("\n\n\t\t\t\t*** MODEL COMPARISON PARAMETERS -- without SE ***\n",sep="")
	print(parameter_df)
	cat("\n\n\t\t\t\t*** MODEL COMPARISON PARAMETERS -- with SE ***\n",sep="")
	print(parameter.se_df)
	}


######### run function

results_fitContinuous_one_trait(phy,dat)


# 16) This is the type of output that you might get:

# [1] "OK” <--- The data is concordant.

#	*** BROWNIAN MOTION -- BM: fitting BM with SE *** 
#GEIGER-fitted comparative model of continuous data
# fitted ‘BM’ model parameters:
#	sigsq = 0.006036
#	SE = 0.000000
#	z0 = 2.814478

# model summary:
#	log-likelihood = -183.527424
#	AIC = 373.054848
#	AICc = 373.108539
#	free parameters = 3

#Convergence diagnostics:
#	optimization iterations = 100
#	failed iterations = 0
#	frequency of best fit = 0.31

# object summary:
#	'lik' -- likelihood function
#	'bnd' -- bounds for likelihood search
#	'res' -- optimization iteration summary
#	'opt' -- maximum likelihood parameter estimates


# 17) The model comparison based on AIC

#				*** MODEL COMPARISON no measurement error --log10_Mass_g *** 
#	delta-AIC values for models assuming no measurement error
#    				 zero indicates the best model

#                 BM     OU    EB   white lambda kappa  delta
# delta_aicc   59.282 59.092 61.31 719.142      0 6.169 60.808
# significance    ***    ***   ***     ***     ns    **    ***


#				*** MODEL COMPARISON with measurement error -- log10_Mass_g ***
#		   delta-AIC values for models estimating SE
#    				 zero indicates the best model

#                 BM    OU    EB   white lambda kappa delta
# delta_aicc.se 0.623 2.659 0.141 721.792  2.659 0.634     0
# significance    ***   ***   ***     ***     ns    **   ***


#                      BM       OU       EB     white   lambda    kappa    delta
# AICc            371.0548 370.8381 373.0556 1030.9143 311.7458 317.9148 372.5542
# dAICc             59.282   59.092    61.31   719.142        0    6.169   60.808
# significance         ***      ***      ***       ***       ns       **      ***
# AICc_SE         311.7458 313.7458 311.2271 1032.9143 313.7458 311.7201 311.0864
# dAICc_SE           0.623    2.659    0.141   721.792    2.659    0.634        0
# significance_SE       ns       ns       ns       ***       ns       ns       ns

# 19) The model comparison based on AIC and summary statistics (parameters):

# *** MODEL COMPARISON PARAMETERS -- without SE ***
#         alpha   EB-a lambda  kappa  delta
# BM     ------ ------ ------ ------ ------
# OU     0.0031 ------ ------ ------ ------
# EB     ------      0 ------ ------ ------
# white  ------ ------ ------ ------ ------
# lambda ------ ------ 0.9903 ------ ------
# kappa  ------ ------ ------ 0.6987 ------
# delta  ------ ------ ------ ------ 1.2214


# *** MODEL COMPARISON PARAMETERS -- with SE ***
#         alpha    EB-a lambda  kappa  delta
# BM     ------  ------ ------ ------ ------
# OU          0  ------ ------ ------ ------
# EB     ------ -0.0082 ------ ------ ------
# white  ------  ------ ------ ------ ------
# lambda ------  ------ 0.9903 ------ ------
# kappa  ------  ------ ------ 0.8636 ------
# delta  ------  ------ ------ ------ 0.4981

# 19) The best fitting model was lambda assuming no measurement error

#	*** PAGEL'S LAMBDA WITH NO ME : fitting lambda *** 

# GEIGER-fitted comparative model of continuous data
# fitted ‘lambda’ model parameters:
#	lambda = 0.990334   <---  Notice the high lambda value
#	sigsq = 0.004739
#	z0 = 2.812349

# model summary:
#	log-likelihood = -152.872911
#	AIC = 311.745822
#	AICc = 311.799513
#	free parameters = 3

# Convergence diagnostics:
#	optimization iterations = 100
#	failed iterations = 0
#	frequency of best fit = 0.12

# object summary:
#	'lik' -- likelihood function
#	'bnd' -- bounds for likelihood search
#	'res' -- optimization iteration summary
#	'opt' -- maximum likelihood parameter estimates


#################### Phylogenetic Signal for Continuous Characters #########################

# 19) Let's test for phylogenetic signal

# We can use Pagel's lambda to assesses the phylogenetic signal (tendency of closely related
# species to resemble for an specific trait). Lambda parameter is used as a tree transformation
# parameter that multiply the internal branches of the tree by values between 0 and 1. 

# A lambda of 1 indicates strong signal
# A lambda of 0 indicates son signal

# We can determine the magnitude of the signal by estimating the lambda in our tree and then
# compare the Log-lik with that of tree transformed to a polytomy (i.e., branches multiplied by 
# lambda = 0)

# Pagel, M. 1994. Detecting correlated evolution on phylogenies: a general method for the comparative analysis of
# discrete characters. Proceedings of the Royal Society of London B 255:37-45.

# 20) Let's estimate our lambda 0 tree

bird_pruned_tree_lambda_0 <- rescale(bird_pruned_tree, model = "lambda", 0)

#Let's plot this tree and the original bird tree

par(mfrow=c(1,2))
plot(bird_pruned_tree, edge.width = 1, show.tip.label = FALSE) #smaller tree
add.scale.bar(cex = 0.7, font = 2, col = "red")
plot(bird_pruned_tree_lambda_0, edge.width = 1, show.tip.label = FALSE) #smaller tree
add.scale.bar(cex = 0.7, font = 2, col = "red")

#20) Let's fit the lambda transformation to determine if there is signal

# Fit lambda to our tree

our_data_fitContinuous <- fitContinuous(phy,dat,SE=0, model="lambda",bounds=list(SE=c(0,0.5)))
our_data_fitContinuous

# GEIGER-fitted comparative model of continuous data
# fitted ‘lambda’ model parameters:
#	lambda = 0.990334
#	sigsq = 0.004739
#	z0 = 2.812349

# model summary:
#	log-likelihood = -152.872911
#	AIC = 311.745822
#	AICc = 311.799513
#	free parameters = 3

# Convergence diagnostics:
#	optimization iterations = 100
#	failed iterations = 0
#	frequency of best fit = 0.14

# object summary:
#	'lik' -- likelihood function
#	'bnd' -- bounds for likelihood search
#	'res' -- optimization iteration summary
#	'opt' -- maximum likelihood parameter estimates


# 21) Fit the lamda 0 tree (no signal)

our_data_fitContinuous_lambda_0 <- fitContinuous(bird_pruned_tree_lambda_0,dat,SE=0, model="lambda",bounds=list(SE=c(0,0.5)))
our_data_fitContinuous_lambda_0

# GEIGER-fitted comparative model of continuous data
# fitted ‘lambda’ model parameters:
#	lambda = 0.000000
#	sigsq = 0.004776
#	z0 = 1.938157

# model summary:
#	log-likelihood = -513.457139
#	AIC = 1034.914278
#	AICc = 1035.003965
#	free parameters = 4

# Convergence diagnostics:
#	optimization iterations = 100
#	failed iterations = 0
#	frequency of best fit = 0.69

# object summary:
#	'lik' -- likelihood function
#	'bnd' -- bounds for likelihood search
#	'res' -- optimization iteration summary
#	'opt' -- maximum likelihood parameter estimates

#22)  For model comparison, the model with the lowest AIC score is preferred

our_tree_versus_lambda_0_aicc <- abs(our_data_fitContinuous_lambda_0$opt$aicc - our_data_fitContinuous$opt$aicc)
our_tree_versus_lambda_0_aicc # 721.1685 -- lambda 0 tree (no signal) is very unlikely

##### our tree versus lambda 0

d_our_tree_versus_lambda_0 <- abs(2*(our_data_fitContinuous_lambda_0$opt$lnL-our_data_fitContinuous$opt$lnL))
d_our_tree_versus_lambda_0

# [1] 721.1685

p_our_tree_versus_lambda_0 <- pchisq(d_our_tree_versus_lambda_0, 1, lower.tail=FALSE)
p_our_tree_versus_lambda_0

#[1] 7.457298e-159 -- Rejects no signal model

##############################################################################################################
##############################################################################################################