sessionInfo() # What version and the platform do you have? install.packages("plyr") # you might need to retype the quotation marks library(plyr) # Repeat the same process for ‘reshape' and check if both packages are # loaded using sessionInfo() ?reshape # A new window will appear with the same information as is in the pdf manual setwd(dir)# where dir is the path to your file, you can get it using terminal in Mac OS read.table (file="Birds_mass_BM_McNab_2009_class_tab.txt", header = TRUE, sep = " ") birds_MR <- read.table("Birds_mass_BM_McNab_2009_class_tab.txt", header = TRUE, sep = " ") birds_MR head(birds_MR) boxplot(log10(Mass)) Mass_first_10 <- Mass[1:10] #first 10 elements Mass_some_elements <- Mass[c(1,5,15,3,4,400)] #some specific elements Mass_conditional <- Mass[Mass < 1000 & Mass 100] # vector with elements greater than # 100 g but less than 1000 g Mass_duplicated <-Mass[duplicated(Mass)] #duplicated elements in Mass Mass_non_duplicated <-Mass[!duplicated(Mass)] #similar to unique(), uses ! to indicate #the opposite to the logical duplicate food <-birds_MR$Food # factor vector food str(food)# it will give you the structure of the object: Factor w/ 3 levels... class(food)# it will give you the class of the object: [1] "factor" length (food)# it will give you the number of entries: [1] 533 food_character <- as.character(food) #converts factor to character vector length (which(food_character == "carnivore")) #how many times "carnivore" is repeated carnivore_n <- length (which(food_character == "carnivore")) omnivore_n <- length (which(food_character == "omnivore")) vegetarian_n <- length (which(food_character == "vegetarian")) vector_food <- c(carnivore_n, omnivore_n, vegetarian_n) bplt <- barplot(vector_food, names.arg = c("carnivore", "omnivore", "vegetarian") , col = c("red", "yellow", "green"), cex.axis = 2, cex.names = 2) text(x= bplt, y= vector_food+5, labels=as.character(vector_food), xpd=TRUE, cex = 2) str(birds_MR)# Our input data is actually a data frame birds_MR [100,3] #print element in row 100 and column 3 (i.e., [m,n] matrix notation) birds_MR [100,] #print elements in row 100 birds_MR [,3] #print elements in column 3 (i.e., BMR_kJ_per_h variable) birds_MR [1:10,] #print elements in the first 10 rows dim(birds_MR)# dimensions of data frame in rows by column in [m,n] matrix notation names(birds_MR) # Get the names of variables in the data frame summary(birds_MR) # basic statistics of the variables in the data frame birds_MR_incomplete_cases <- birds_MR[!complete.cases(birds_MR),]# rows with missing #values birds_MR_complete_cases <- birds_MR [complete.cases(birds_MR),] # delete rows with # incomplete data birds_MR_complete_cases_2 <- na.omit(birds_MR) # similar to process as above birds_nocturnal_temperate <- subset(birds_MR, Time=='Nocturnal' & Climate == 'tropical') # this will select rows (species) that are both Nocturnal and tropical. # Notice the lower and upper case of the names, they need to be exact as is in the data # frame birds_big <- subset(birds_MR, Mass_g > 1000) # this will select species with more 1000 g birds_big_temperate <- subset(birds_MR, Mass_g >1000 & Climate == 'temperate') birds_big_not_temperate <- subset(birds_MR, Mass_g > 1000 & !Climate == 'temperate') # Notice the ! Indicating that we do not want the species that are temperate birds_temperate_polar <- subset(birds_MR, Climate %in% c('temperate', 'polar')) # Notice the %in% that indicate ‘nested' in Climate variable states: temperate and polar birds_speciec_mass_BMR <- data.frame(birds_MR$Genus_Species, birds_MR$Mass_g, birds_MR$BMR_kJ_per_h)# extracts specific columns of birds_MR data frame birds_MR$Mass_specific_BMR_kj_per_h_g <- birds_MR$BMR_kJ_per_h/birds_MR$Mass_g head(birds_MR) #a new column with the new calculated variable will appear birds_MR$size <- ifelse(birds_MR$Mass_g > 1000, "big_bird", "small_bird") #ifelse function is useful (see ?ifelse). Notice its use ifelse(test, yes, no) head(birds_MR) #a new column with the new categorical variable will appear mean(birds_MR$Mass_g) # you can get this from the summary() function min(birds_MR$Mass_g) #minimum max(birds_MR$Mass_g) #maximum birds_MR$log10Mass_g <- log10(birds_MR$Mass_g) birds_MR$log10BMR <- log10(birds_MR$BMR_kJ_per_h) #we needed to transform this variables, see their boxplots graphs cor(birds_MR$log10Mass_g, birds_MR$log10BMR , method = c("pearson")) cor.test( ~ birds_MR$log10Mass_g + birds_MR$log10BMR, data=birds_MR) plot(birds_MR$log10Mass_g, birds_MR$log10BMR, main="Log-Log mass versus BMR", xlab="Log-Mass", ylab="Log-BMR", pch=19, cex.axis=1.5, cex.lab = 1.5) abline(lm(birds_MR$log10BMR~birds_MR$log10Mass_g ), col="red", lwd = 4) # regression line (y~x) text(x= 2, y= 2, labels="Pearson's correlation r=0.97", cex = 1.5) birds_MR_based_food <- data.frame(birds_MR$Food, birds_MR$log10Mass_g, birds_MR$log10BMR) #this selects specific columns and crates a new data frame birds_MR_based_food_2 <- birds_MR [c("Food", "log10Mass_g", "log10BMR")] head(birds_MR_based_food) head(birds_MR_based_food_2) tail(birds_MR_based_food) # check the last rows of this data frame names(birds_MR_based_food) <- c('food', 'log10Mass','log10BMR') #change the name of variables write.table(birds_MR, file="Birds_mass_BM_McNab_2009_class_csv_updated.txt", col.names = TRUE, sep = ",") add_two_variables <- function (x,y) { z <- x + y z } add_two_variables(1,2) a <-sample(1:40, 6) #sample 6 numbers randomly from 1 to 40 a b <- sample(1:40, 6) b add_two_variables(a,b) c <- add_two_variables(a,b) c results <-data.frame(a,b,c) results lapply (birds_MR, sd) # list of results of applying function in this case ‘sd' to our data frame install.packages("ggplot2") install.packages("reshape") install.packages("plyr") library(ggplot2) library(reshape) library(plyr) birds_MR_based_food <- data.frame(birds_MR$Food, birds_MR$log10Mass_g, birds_MR$log10BMR) head(birds_MR_based_food) names(birds_MR_based_food) <- c('food', 'log10Mass','log10BMR') #change the name of variables in data frame cor_func <- function(x) #we can build our own function to do correlations for specific groups { return(data.frame(COR = round(cor(x$log10Mass, x$log10BMR),4))) } ddply(birds_MR_based_food, .(food), cor_func) #ddply is a loop function that allows to run groups of data based on grouping variable (food) cor_by_food <- ddply(birds_MR_based_food, .(food), cor_func) #ddply is a loop function that allows to run groups of data based on grouping variable (food) ggplot(data = birds_MR_based_food, aes(x = log10Mass, y = log10BMR, group=food, colour=food)) + #define x and y variables, grouping variable, coloring varaible geom_smooth(method = "lm", se=FALSE, aes(fill = food), formula = y ~ x) + #define the method for regression in this case lm (least square regression), color by food type geom_point(aes(shape = food, size = 2)) + annotate("text", x = 1, y = 2.5, label = "carnivore R=", color="red") + annotate("text", x = 1.5, y = 2.5, label = as.character(cor_by_food[1,2]), color="red") + annotate("text", x = 1, y = 2, label = "ominivore R=", color="darkgreen") + annotate("text", x = 1.5, y = 2, label = as.character(cor_by_food[2,2]), color="darkgreen") + annotate("text", x = 1, y = 1.5, label = "vegetarian R=", color="blue") + annotate("text", x = 1.5, y = 1.5, label = as.character(cor_by_food[3,2]), color="blue") ggsave("correlations_food.pdf", width = 16, height = 9, dpi = 120) #it will save the graph as a pdf birds_MR_based_food_melt <- melt (birds_MR_based_food)#it will transform the data frame to have grouping variable (food), variable for variable names, and values head(birds_MR_based_food_melt)# plot(birds_MR$log10Mass_g, birds_MR$log10BMR, main="Log-Log mass versus BMR", xlab="Log-Mass", ylab="Log-BMR", pch=19, cex.axis=1.5, cex.lab = 1.5) abline(lm(birds_MR$log10BMR~birds_MR$log10Mass_g ), col="red", lwd = 4) # regression line (y~x) text(x= 2, y= 2, labels="Pearson's correlation r=0.97", cex = 1.5)