##### First block of code for calculating and writing species averages for Mertens data ####

setwd('~/Dropbox/AKTUELLE PROJEKTE/Messel Necrosaurus/_Ecomorphology')
rawdata <- read.table('Varanus_species_averages.txt', header = T, sep = '\t')
mertens <- read.table('Mertens_clean.txt', header=T, sep = '\t')

for (i in 1:dim(rawdata)[1]) {
  sp <- which(mertens$Species==as.character(rawdata[i,1]))
  rawdata$N[i] <- length(sp)
  rawdata$SVL..mm.[i] <- mean(mertens$SVL..mm.[sp], na.rm = T)
  rawdata$Tail.L..mm.[i] <- mean(mertens$Tail.L..mm.[sp], na.rm = T)
  rawdata$Head.L..mm.[i] <- mean(mertens$Head.L..mm.[sp], na.rm = T)
  rawdata$Head.W..mm.[i] <- mean(mertens$Head.W..mm.[sp], na.rm = T)
  rawdata$Head.H..mm.[i] <- mean(mertens$Head.H..mm.[sp], na.rm = T)
  rawdata$Forelimb..mm.[i] <- mean(mertens$Forelimb..mm.[sp], na.rm = T)
  rawdata$Hind.limb..mm.[i] <- mean(mertens$Hind.limb..mm.[sp], na.rm = T)
}
write.table(rawdata, file="Varanus.txt", sep='\t')

##### Second block of code for conducting PCAs and LDAs ####
setwd('~/Dropbox/2020_Messel_Necrosaurus/_Ecomorphology')
library(ggplot2)
library(ggpubr)
library(factoextra)
library(MASS)

# PCA with data from Mertens (1942)
rawdata <- read.table('Varanus.txt', header = T, sep = '\t')
rawdata.active <- rawdata[,c(5:8,10,11)]
rownames(rawdata.active) <- rawdata$Species..RD.
rawdata.active <- rawdata.active[-c(5,9,13,14,20),]

rawdata.pca <- prcomp(rawdata.active, scale = T)
fviz_eig(rawdata.pca, yscale="sqrt")
fviz_pca_ind(rawdata.pca, axes=c(2,3), repel = TRUE, habillage = rawdata$Ecology[-c(5,9,13,14,20)], palette = c("green", "blue", "brown", "orange"))
fviz_pca_var(rawdata.pca, axes=c(2,3), repel = TRUE)

# exploratory data visualization
test <- cbind(thomwith$Ecology, thomwith.active)
colnames(test)[1] <- "Ecology"
test <- test[-19,]
gghistogram(test, x="SVL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "tan4", "steelblue2", "tan"))
gghistogram(test, x="HL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "tan4", "steelblue2", "tan"))
gghistogram(test, x="HW", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "tan4", "steelblue2", "tan"))
gghistogram(test, x="UFL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "tan4", "steelblue2", "tan"))
gghistogram(test, x="LFL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "tan4", "steelblue2", "tan"))
FL <- thomwith.active$UFL + thomwith.active$LFL
HL <- thomwith.active$UHL + thomwith.active$LHL
plot(thomwith$TA, HL/FL, xlab="Body size (thorax length, mm)", ylab="Hindlimb/forelimb ratio")
gghistogram(FL, x="FL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "tan4", "steelblue2", "tan"))
gghistogram(test, x="UHL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "tan4", "steelblue2", "tan"))
gghistogram(test, x="LHL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "tan4", "steelblue2", "tan"))
gghistogram(test, x="TAIL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "tan4", "steelblue2", "tan"))

# PCA with data from Thompson & Withers (1997)
thomwith <- read.table('Thompson&Withers_1997.txt', header=T, sep='\t', row.names=1)
thomwith.active <- thomwith[,c(2,3,5,8,9,11,12,14)]
for (i in 1:dim(thomwith.active)[1]) {
  thomwith.active[i,] <- thomwith.active[i,]/thomwith$TA[i]
}

# exploratory data visualization
test <- cbind(thomwith$Ecology, thomwith.active)
colnames(test)[1] <- "Ecology"
gghistogram(test, x="SVL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "black", "tan4", "steelblue2", "tan"))
gghistogram(test, x="HL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "black", "tan4", "steelblue2", "tan"))
gghistogram(test, x="HW", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "black", "tan4", "steelblue2", "tan"))
gghistogram(test, x="UFL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "black", "tan4", "steelblue2", "tan"))
gghistogram(test, x="LFL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "black", "tan4", "steelblue2", "tan"))
gghistogram(test, x="UHL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "black", "tan4", "steelblue2", "tan"))
gghistogram(test, x="LHL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "black", "tan4", "steelblue2", "tan"))
gghistogram(test, x="TAIL", bins=10, color = "Ecology", fill="Ecology", palette=c("limegreen", "black", "tan4", "steelblue2", "tan"))

# without feisti
thomwith.pca.extant <- prcomp(thomwith.active[1:18,], scale = TRUE)
fviz_eig(thomwith.pca.extant, yscale="sqrt")
fviz_pca_ind(thomwith.pca.extant, axes=c(1,2), repel = TRUE, habillage = thomwith$Ecology[1:18], palette = c("limegreen", "tan4", "steelblue2", "tan"), addEllipses = TRUE)
fviz_pca_var(thomwith.pca.extant, axes=c(1,2), repel = TRUE)
get_eigenvalue(thomwith.pca.extant)

# with feisti
thomwith.pca <- prcomp(thomwith.active, scale = TRUE)
fviz_eig(thomwith.pca, yscale="sqrt")
fviz_pca_ind(thomwith.pca, axes=c(2,3), repel = TRUE, habillage = thomwith$Ecology, palette = c("limegreen", "black", "tan4", "steelblue2", "tan"), addEllipses = TRUE)
fviz_pca_var(thomwith.pca, axes=c(2,3), repel = TRUE)
get_eigenvalue(thomwith.pca)

# LDA with data from Thompson & Withers (1997)
thomwith.train <- thomwith[1:18,c(1:3,5,8,9,11,12,14)]
thomwith.train$Ecology <- droplevels(thomwith.train$Ecology)
thomwith.test <- thomwith[19,c(1:3,5,8,9,11,12,14)]
for (i in 1:dim(thomwith.train)[1]) {
  thomwith.train[i,2:9] <- thomwith.train[i,2:9]/thomwith$TA[i]
}
thomwith.test[,2:9] <- thomwith.test[,2:9]/thomwith$TA[19]
f <- paste(names(thomwith.train)[1], " ~ ", paste(names(thomwith.train)[2:9], collapse=" + "))
thomwith.lda <- lda(as.formula(paste(f)), data=cbind(thomwith.train))
feisti.lda.predict <- predict(thomwith.lda, newdata=thomwith.test[,2:9])

