########################################
##### Captive animal gut microbiota is populated with microorganisms that are relevant to the digestion of host dominant diet 
##### Yu Hsin Chu, Ziwen Ran
##### Supplementary R Script
########################################



########################################
##### Load packages needed
library(tidyverse)
library(vegan)
library(ape)
library(phyloseq)
library(DESeq2)
library(ggplot2)
library(qiime2R)
########################################

##### Expanding the metadata table with extra information

########################################
##### Generate a metadata table with columns for general diet type,
##### dominant diet categories, and the percentage of the dominant diet category

## We received the metadata table in a *.xlsx format
## and use the "save as" function in Excel to save it as a *.csv file

# Read the metadata table in R and save it as a data frame
metadata <- read.csv("zoo_metadata.csv", header = TRUE)

## The information of general diet type is not included in the metadata table
## but is included in Table 1 in the publication by McKenzie et al.
## We transferred this information to a csv file
# Read the species-diet_type table in R and save it as a data frame
diet_type <- read.csv("zoo_diet_type.csv", header = TRUE)

## To add a column for general diet type in the metadata table,
# Join metadata and diet_type data frames by the column "Taxonomy_Species"
metadata <- full_join(metadata, diet_type, by = "Taxonomy_Species", 
                      copy = FALSE, keep = FALSE)

## We define the diet category that makes up >=50% of an animal's diet 
## as its dominant diet category
## To add a column for the dominant diet category

## In case of any error that comes up, to make the troubleshooting simple,
## We decided to make a smaller data frame with only the relavant metadata columns
# To make a new data frame with the sample ID and percentage of each diet category
diet_cat <- select(metadata, SampleID_extra, Diet.Fruit, Diet.Inv, Diet.PlantO, 
                   Diet.Scav, Diet.Seed, Diet.Vect, Diet.Vend, Diet.Vfish,
                   Diet.Vunk) %>% 
  rename(Fruit = Diet.Fruit, Inv = Diet.Inv, PlantO = Diet.PlantO, Scav = Diet.Scav,
         Seed = Diet.Seed, Vect = Diet.Vect, Vend = Diet.Vend, Vfish = Diet.Vfish,
         Vunk = Diet.Vunk)

# Spread the diet_cat dataframe into a long table
diet_cat_long <- diet_cat %>%
  rownames_to_column() %>% 
  pivot_longer(cols = c(Fruit, Inv, PlantO, Scav, Seed, Vect, Vend, Vfish, Vunk))

# In the long table data frame diet_cat_long, keep only the rows with diet categories
# that makes up 60% of the animal's total diet and above
diet_cat_dom <- diet_cat_long %>% 
  group_by(SampleID_extra) %>% 
  filter(value >= 50) %>%
  select(SampleID_extra, name, value) %>% 
  rename(dom_diet_cat = name, dom_cat_percentage = value)

## To add the columns for the dominant diet categories and how much they make up the 
## animal's diet back to the large metadata table
# To join metadata and diet_cat_dom data frames by sample ID 
metadata <- full_join(metadata, diet_cat_dom, by = "SampleID_extra", 
                      copy = FALSE, keep = FALSE)

# Rename the X.SampleID column
metadata <- rename(metadata, SampleID = X.SampleID)

# To write the changed metadata data frame as a *.tsv file
write.table(metadata, file = "metadata1.tsv", row.names = FALSE, sep = "\t")
########################################

##### Differential Abundance Analysis

########################################
# Set a random number
set.seed(711)

##### Import QIIME 2 outputs to R
## Manually delete the first line and the "#" before "OTU ID" in "feature-table.tsv"
# Read the *.tsv output of the feature table
feature <- read.table("feature-table.tsv", header = TRUE, sep = "\t", row.names = 1, check.names = FALSE)

## Manually change the header "Taxon" to "taxonomy", and "Confidence" to "confidence" 
## and delete the header for the first column "Feature ID" in "taxonomy.tsv"
# Read the *.tsv output of the taxonomy information
taxonomy <- read.table("taxonomy.tsv", header = TRUE, sep = "\t", row.names = 1) 

# Separate taxonomy information into multiple columns by rank
taxonomy <- separate(taxonomy, col = taxonomy, into = c("Kingdom", "Phylum", 
                                                        "Class","Order", "Family",
                                                        "Genus", "Species"), sep = ";")

# Read the phylogenetic tree *.nwk output to R
tree <- read_tree_greengenes("tree.nwk")

## Manually delete the header for the first column (SampleID) of the metadata1.tsv file
# Read the metadata table into R again
metadata1 <- read.table("metadata1.tsv", header = TRUE, row.names = 1)
########################################


########################################
##### Prepare the phyloseq object

# Merge the feature table and taxonomy data to a data frame
biom_file <- merge(feature, taxonomy, by.x = c("OTU.ID"), by.y = c("OTU.ID"))

# Convert the "feature" data frame into a numerical matrix
feat <- as.matrix(feature)

# Format the "feature" matrix as the OTU table for the phyloseq object
feat <- otu_table(feat,taxa_are_rows = TRUE)

# Convert the "feature" data frame into a matrix
tax <- as.matrix(taxonomy)

# Format the "taxonomy" matrix as the taxonomy table for the phyloseq object
tax <- tax_table(tax)

# Convert the phylogenetic tree from multichotomous to dichotmous
tree <- multi2di(tree)

# Format the "metadata" data frame as the sample data for the phyloseq object
sampdat <- sample_data(metadata1)

# Combine all information into a single phyloseq object
physeq <- merge_phyloseq(feat, tax, sampdat, tree)
########################################


########################################
##### Rarefaction

## The rarefaction depth was determined to be 70,000 during data processing in QIIME 2
# Perform rarefaction with a rarefaction depth of 70,000
physeq_rar <- rarefy_even_depth(physeq, sample.size = 70000)
## 2 samples removedbecause they contained fewer reads than `sample.size`.
## 397 OTUs were removed because they are no longer present in any sample after random subsampling
########################################


########################################
##### Calculate relative abundance

# Define the function for calculating relative abundance
calculate_relative_abundance <- function(x) x / sum(x)

# Calculate relative abundance
physeq_RA <- transform_sample_counts(physeq, function(x) x / sum(x))
########################################


########################################
##### Remove low-abundance features

# Make an R object for feature counts
total_counts <- taxa_sums(physeq)

# Make an R object for relative abundance
relative_abundance <- calculate_relative_abundance(total_counts)

## We define low-abundance features as those that makes up less than 0.5% of total feature counts
# Filter out the low-abundance features in the relative abundance object
abundant <- relative_abundance > 0.0005 

# Filter out the low-abundance features in the phyloseq object
abundant_taxa <- prune_taxa(abundant, physeq)
########################################


########################################
##### Set taxonomic level for analysis to phylum
phylum <- tax_glom(physeq, taxrank = "Phylum", NArm = FALSE)
########################################


########################################
##### Differential abundance Analysis for captive carnivores
##### At phylum level

# Subset captive animals from the phyloseq object
captive <- subset_samples(physeq, captive_wild == "captive")

# Subset carnivores from the captive subset of the phyloseq object
capcarni <- subset_samples(captive, diet_type == "carnivore")

# Calculate relative abundance for the captive carnivore subset
capcarni_RA <- transform_sample_counts(capcarni, calculate_relative_abundance)

# Count features to enable filtering of low-abundance features
capcarni_counts <- taxa_sums(capcarni)
relative_abundance_capcarni <- calculate_relative_abundance(capcarni_counts)

# Filter out low-abundance features from the captive carnivore subset
abundant_capcarni <- relative_abundance_capcarni > 0.0005
abundant_capcarni_taxa <- prune_taxa(abundant_capcarni, capcarni_RA)

# Set taxonomic level for analysis of captive carnivore to phylum
abundant_capcarni_phylum <- tax_glom(abundant_capcarni_taxa, taxrank = "Phylum")

# Create a DESeq object for the captive carnvores
## The catagorical variable that we want to compare is dominant diet category
deseq_capcarni <- phyloseq_to_deseq2(abundant_capcarni_phylum, ~ dom_diet_cat)

# Define the function for calculating geometric means
calculate_gm_mean <- function(x, na.rm = TRUE) {
  exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))
}

# Calculate geometric means for captive carnivores
geo_means_capcarni <- apply(counts(deseq_capcarni), 1, calculate_gm_mean)

# Update the captive carnivore DESeq2 object with the geometric means
deseq_capcarni <- estimateSizeFactors(deseq_capcarni, geoMeans = geo_means_capcarni)

# Conduct DESeq analysis for captive carnivores
deseq_capcarni <- DESeq(deseq_capcarni, fitType = "local")

# Output the DESeq analysis results to an R object
capcarni_diff_abund <- results(deseq_capcarni)

# Transform the output to a data frame
capcarni_da <- as.data.frame(capcarni_diff_abund)

# Define alpha level (significance)  as 0.05
alpha <- 0.05

# Filter the significant adjusted p values in captive carnivore
## This selects the features that are significantly different between 
## the carnivores consuming different dominant diet categories
significant_capcarni <- filter(capcarni_da, padj < alpha)
## After this filtering, the number of observations in the significant_capcarni data frame became 0

# Create a data frame with taxonomic information
capcarni_phylum_df <- as.data.frame(tax_table(abundant_capcarni_phylum))

# Merge the differential abundance analysis result table with taxonomy information
capcarni_da_phylum <- merge(capcarni_da, capcarni_phylum_df, by = "row.names")

# Arrange the merge table by log fold change
capcarni_da_phylum <- arrange(capcarni_da_phylum, log2FoldChange)
########################################


########################################
##### Differential abundance Analysis for captive carnivores
##### At class level

# Set taxonomic level for analysis of captive carnivore to class
abundant_capcarni_class <- tax_glom(abundant_capcarni_taxa, taxrank = "Class")

# Create a DESeq object for the captive carnvores
## The catagorical variable that we want to compare is dominant diet category
deseq_capcarni_class <- phyloseq_to_deseq2(abundant_capcarni_class, ~ dom_diet_cat)

# Calculate geometric means 
geo_means_capcarni_class <- apply(counts(deseq_capcarni_class), 1, calculate_gm_mean)

# Update the DESeq2 object with the geometric means
deseq_capcarni_class <- estimateSizeFactors(deseq_capcarni_class, geoMeans = geo_means_capcarni_class)

# Conduct DESeq analysis
deseq_capcarni_class <- DESeq(deseq_capcarni_class, fitType = "local")

# Output the DESeq analysis results to an R object
capcarni_diff_abund_class <- results(deseq_capcarni_class)

# Transform the output to a data frame
capcarni_da_class <- as.data.frame(capcarni_diff_abund_class)

# Filter the significant adjusted p values in captive carnivore
## This selects the features that are significantly different between 
## the carnivores consuming different dominant diet categories
significant_capcarni_class <- filter(capcarni_da_class, padj < alpha)
## After this filtering, the number of observations in the significant_capcarni_class data frame became 3

# Create a data frame with taxonomic information
capcarni_class_df <- as.data.frame(tax_table(abundant_capcarni_class))

# Merge the differential abundance analysis result table with taxonomy information
significant_capcarni_class <- merge(significant_capcarni_class, capcarni_class_df, by = "row.names")

# Arrange the merge table by log fold change
significant_capcarni_class <- arrange(significant_capcarni_class, log2FoldChange)
########################################


########################################
##### Differential abundance Analysis for captive herbivores
##### At phylum level

# Subset herbivores from the captive subset of the phyloseq object
capherb <- subset_samples(captive, diet_type == "herbivore")

# Calculate relative abundance for the captive herbivore subset
capherb_RA <- transform_sample_counts(capherb, calculate_relative_abundance)

# Count features to enable filtering of low-abundance features
capherb_counts <- taxa_sums(capherb)
relative_abundance_capherb <- calculate_relative_abundance(capherb_counts)

# Filter out low-abundance features
abundant_capherb <- relative_abundance_capherb > 0.0005
abundant_capherb_taxa <- prune_taxa(abundant_capherb, capherb_RA)

# Set taxonomic level for analysis to phylum
abundant_capherb_phylum <- tax_glom(abundant_capherb_taxa, taxrank = "Phylum")

# Create a DESeq object for the captive herbivores
## The catagorical variable that we want to compare is dominant diet category
deseq_capherb_phylum <- phyloseq_to_deseq2(abundant_capherb_phylum, ~ dom_diet_cat)

# Calculate geometric means 
geo_means_capherb_phylum <- apply(counts(deseq_capherb_phylum), 1, calculate_gm_mean)

# Update the DESeq2 object with the geometric means
deseq_capherb_phylum <- estimateSizeFactors(deseq_capherb_phylum, geoMeans = geo_means_capherb_phylum)

# Conduct DESeq analysis
deseq_capherb_phylum <- DESeq(deseq_capherb_phylum, fitType = "local")

# Output the DESeq analysis results to an R object
capherb_diff_abund_phylum <- results(deseq_capherb_phylum)

# Transform the output to a data frame
capherb_da_phylum <- as.data.frame(capherb_diff_abund_phylum)

# Filter the significant adjusted p values
## This selects the features that are significantly different between 
## the herbivores consuming different dominant diet categories
significant_capherb_phylum <- filter(capherb_da_phylum, padj < alpha)
## After this filtering, the number of observations in the significant_capherb_phylum data frame became 4

# Create a data frame with taxonomic information
capherb_phylum_df <- as.data.frame(tax_table(abundant_capherb_phylum))

# Merge the differential abundance analysis result table with taxonomy information
significant_capherb_phylum <- merge(significant_capherb_phylum, capherb_phylum_df, by = "row.names")

# Arrange the merge table by log fold change
significant_capherb_phylum <- arrange(significant_capherb_phylum, log2FoldChange)
########################################


########################################
##### Differential abundance Analysis for captive herbivores
##### At class level

# Set taxonomic level for analysis of captive herbivore to class
abundant_capherb_class <- tax_glom(abundant_capherb_taxa, taxrank = "Class")

# Create a DESeq object for the captive harnvores
## The catagorical variable that we want to compare is dominant diet category
deseq_capherb_class <- phyloseq_to_deseq2(abundant_capherb_class, ~ dom_diet_cat)

# Calculate geometric means 
geo_means_capherb_class <- apply(counts(deseq_capherb_class), 1, calculate_gm_mean)

# Update the DESeq2 object with the geometric means
deseq_capherb_class <- estimateSizeFactors(deseq_capherb_class, geoMeans = geo_means_capherb_class)

# Conduct DESeq analysis
deseq_capherb_class <- DESeq(deseq_capherb_class, fitType = "local")

# Output the DESeq analysis results to an R object
capherb_diff_abund_class <- results(deseq_capherb_class)

# Transform the output to a data frame
capherb_da_class <- as.data.frame(capherb_diff_abund_class)

# Filter the significant adjusted p values in captive herbivore
## This selects the features that are significantly different between 
## the herbivores consuming different dominant diet categories
significant_capherb_class <- filter(capherb_da_class, padj < alpha)
## After this filtering, the number of observations in the significant_capherb_class data frame became 8

# Create a data frame with taxonomic information
capherb_class_df <- as.data.frame(tax_table(abundant_capherb_class))

# Merge the differential abundance analysis result table with taxonomy information
significant_capherb_class <- merge(significant_capherb_class, capherb_class_df, by = "row.names")

# Arrange the merge table by log fold change
significant_capherb_class <- arrange(significant_capherb_class, log2FoldChange)
########################################


########################################
##### Differential abundance Analysis for captive omnivores
##### At phylum level

# Subset omnivores from the captive subset of the phyloseq object
capomni <- subset_samples(captive, diet_type == "omnivore")

# Calculate relative abundance for the captive omnivore subset
capomni_RA <- transform_sample_counts(capomni, calculate_relative_abundance)

# Count features to enable filtering of low-abundance features
capomni_counts <- taxa_sums(capomni)
relative_abundance_capomni <- calculate_relative_abundance(capomni_counts)

# Filter out low-abundance features
abundant_capomni <- relative_abundance_capomni > 0.0005
abundant_capomni_taxa <- prune_taxa(abundant_capomni, capomni_RA)

# Set taxonomic level for analysis to phylum
abundant_capomni_phylum <- tax_glom(abundant_capomni_taxa, taxrank = "Phylum")

# Create a DESeq object for the captive omniores
## The catagorical variable that we want to compare is dominant diet category
deseq_capomni_phylum <- phyloseq_to_deseq2(abundant_capomni_phylum, ~ dom_diet_cat)

# Calculate geometric means 
geo_means_capomni_phylum <- apply(counts(deseq_capomni_phylum), 1, calculate_gm_mean)

# Update the DESeq2 object with the geometric means
deseq_capomni_phylum <- estimateSizeFactors(deseq_capomni_phylum, geoMeans = geo_means_capomni_phylum)

# Conduct DESeq analysis
deseq_capomni_phylum <- DESeq(deseq_capomni_phylum, fitType = "local")

# Output the DESeq analysis results to an R object
capomni_diff_abund_phylum <- results(deseq_capomni_phylum)

# Transform the output to a data frame
capomni_da_phylum <- as.data.frame(capomni_diff_abund_phylum)

# Filter the significant adjusted p values
## This selects the features that are significantly different between 
## the omnivores consuming different dominant diet categories
significant_capomni_phylum <- filter(capomni_da_phylum, padj < alpha)
## After this filtering, the number of observations in the significant_capomni_phylum data frame became 1

# Create a data frame with taxonomic information
capomni_phylum_df <- as.data.frame(tax_table(abundant_capomni_phylum))

# Merge the differential abundance analysis result table with taxonomy information
significant_capomni_phylum <- merge(significant_capomni_phylum, capomni_phylum_df, by = "row.names")

########################################


########################################
##### Differential abundance Analysis for captive omnivores
##### At class level

# Set taxonomic level for analysis of captive omnivore to class
abundant_capomni_class <- tax_glom(abundant_capomni_taxa, taxrank = "Class")

# Create a DESeq object for the captive omnivores
## The catagorical variable that we want to compare is dominant diet category
deseq_capomni_class <- phyloseq_to_deseq2(abundant_capomni_class, ~ dom_diet_cat)

# Calculate geometric means 
geo_means_capomni_class <- apply(counts(deseq_capomni_class), 1, calculate_gm_mean)

# Update the DESeq2 object with the geometric means
deseq_capomni_class <- estimateSizeFactors(deseq_capomni_class, geoMeans = geo_means_capomni_class)

# Conduct DESeq analysis
deseq_capomni_class <- DESeq(deseq_capomni_class, fitType = "local")

# Output the DESeq analysis results to an R object
capomni_diff_abund_class <- results(deseq_capomni_class)

# Transform the output to a data frame
capomni_da_class <- as.data.frame(capomni_diff_abund_class)

# Filter the significant adjusted p values in captive omnivore
## This selects the features that are significantly different between 
## the omnivores consuming different dominant diet categories
significant_capomni_class <- filter(capomni_da_class, padj < alpha)
## After this filtering, the number of observations in the significant_capomni_class data frame became 2

# Create a data frame with taxonomic information
capomni_class_df <- as.data.frame(tax_table(abundant_capomni_class))

# Merge the differential abundance analysis result table with taxonomy information
significant_capomni_class <- merge(significant_capomni_class, capomni_class_df, by = "row.names")

# Arrange the merge table by log fold change
significant_capomni_class <- arrange(significant_capomni_class, log2FoldChange)
########################################

##### Generating customized alpha diversity plots

########################################
#### Subset the metadata table for relevant variables
aim1 <- metadata %>% 
  select(SampleID, animal, Taxonomy_Order, Taxonomy_Species, diet_type, dom_diet_cat)


########################################
#### Read the evenness output files
#### For each *.tsv file, the header of the first column is manually written as "SampleID"

# Read Pielou evenness data into R
evenness <- read.table("evenness.tsv", header = TRUE)

# Read Faith's pd data into R
faith <- read.table("faith.tsv", header = TRUE)

# Read Shannon's Index data into R
shannon <- read.table("shannon.tsv", header = TRUE)

########################################
##### Put the metadata and the alpha diversity metrics into one data frame
# Join the metadata subset with the alpha diversity data
aim1 <- full_join(aim1, evenness, by = "SampleID", copy = FALSE, keep = FALSE)
aim1 <- full_join(aim1, faith, by = "SampleID", copy = FALSE, keep = FALSE)
aim1 <- full_join(aim1, shannon, by = "SampleID", copy = FALSE, keep = FALSE)

# Remove the samples that were removed during denoising and rarefaction
# (These samples would be missing alpha diversity data)
aim1 <- filter(aim1, !is.na(pielou_evenness))

########################################
##### For carnivores
# Filter the carnivores from the aim1 data frame
carnivore <- filter(aim1, diet_type == "carnivore")

# Generate a boxplot comparing Pielou's evenness index of 
# different dominant diet categories within carnivores
carnivore %>% 
  ggplot(aes(x = dom_diet_cat, y = pielou_evenness)) +
  stat_boxplot(geom = "errorbar", width = 0.5) + 
  geom_boxplot(outlier.size = 3) +
  xlab("Dominant Diet Category") +
  ylab("Pielou's Evenness") +
  theme(text = element_text(size = 20))

# Save the boxplot for carnivore Pielou evenness
ggsave("carni_evenness.png", width = 6, height = 6)

# Generate a boxplot comparing Faith's pd index of 
# different dominant diet categories within carnivores
carnivore %>% 
  ggplot(aes(x = dom_diet_cat, y = faith_pd)) +
  stat_boxplot(geom = "errorbar", width = 0.5) + 
  geom_boxplot(outlier.size = 3) +
  xlab("Dominant Diet Category") +
  ylab("Faith's Phylogenetic Diversity") +
  theme(text = element_text(size = 20))

# Save the boxplot for carnivore Pielou's evenness
ggsave("carni_faith.png", width = 6, height = 6)


########################################
##### For carnivores
# Filter the carnivores from the aim1 data frame
herb_omni <- filter(aim1, diet_type == "omnivore" | diet_type == "herbivore")

# Filter out samples missing domniant diet type
herb_omni <- filter(herb_omni, !is.na(dom_diet_cat))

# Make an R object for the facet label
hofacet <- c("Herbivore", "Omnivore")
names(hofacet) <- c("herbivore", "omnivore")

# Generate a boxplot comparing Pielou's evenness index of 
# different dominant diet categories within herbivores and omnivores
herb_omni %>% 
  ggplot(aes(x = dom_diet_cat, y = pielou_evenness)) +
  stat_boxplot(geom = "errorbar", width = 0.5) + 
  geom_boxplot(outlier.size = 3) +
  xlab("Dominant Diet Category") +
  ylab("Pielou's Evenness") +
  facet_wrap(vars(diet_type), scales = "free_x", labeller = labeller(diet_type = hofacet)) +
  theme(text = element_text(size = 20))

# Save the boxplot for herbivore & omnivore Pielou evenness
ggsave("herb_omni_evenness.png", width = 8, height = 6)

# Generate a boxplot comparing Faith's pd index of 
# different dominant diet categories within herbivores and omnivores
herb_omni %>% 
  ggplot(aes(x = dom_diet_cat, y = faith_pd)) +
  stat_boxplot(geom = "errorbar", width = 0.5) + 
  geom_boxplot(outlier.size = 3) +
  xlab("Dominant Diet Category") +
  ylab("Faith's Phylogenetic Diversity") +
  facet_wrap(vars(diet_type), scales = "free_x", labeller = labeller(diet_type = hofacet)) +
  theme(text = element_text(size = 20))

# Save the boxplot for herbivore & omnivore Faith's pd
ggsave("herb_omni_faith.png", width = 8, height = 6)
########################################

########################################
##### Generate customized beta diversity plots

##### Figures included in the manuscript
# Read the *qza outputs from QIIME2 to R
herb_wuni <- read_qza("herb_weighted_unifrac_pcoa_results.qza")
omni_wuni <- read_qza("omni_weighted_unifrac_pcoa_results.qza")
carni_wuni <- read_qza("carni_weighted_unifrac_pcoa_results.qza")
carni_uwuni <- read_qza("carni_unweighted_unifrac_pcoa_results.qza")

# Generate weighted UniFrac PCoA plot for herbivores
herb_wuni$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(16,17), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15))  +
  xlab(paste("PC1:", round(100*herb_wuni$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*herb_wuni$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("herb_weighted_uni.png", width = 8, height = 6)

# Generate weighted UniFrac PCoA plot for omnivores
omni_wuni$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(16,17), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*omni_wuni$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*omni_wuni$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("omni_weighted_uni.png", width = 8, height = 6)

# Generate weighted UniFrac PCoA plot for carnivores
carni_wuni$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(15,18), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*carni_wuni$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*carni_wuni$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("carni_weighted_uni.png", width = 8, height = 6)

# Generate unweighted UniFrac PCoA plot for carnivores
carni_uwuni$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(15,18), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*carni_uwuni$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*carni_uwuni$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("carni_unweighted_uni.png", width = 8, height = 6)

########################################
##### Figures for supplementary figures

##### For captive herbivores
# Read the *qza outputs from QIIME2 to R
herb_uwuni <- read_qza("herb_unweighted_unifrac_pcoa_results.qza")
herb_ja <- read_qza("herb_jaccard_pcoa_results.qza")
herb_bc <- read_qza("herb_bray_curtis_pcoa_results.qza")

# Generate unweighted UniFrac PCoA plot for herbivores
herb_uwuni$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(16,17), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*herb_uwuni$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*herb_uwuni$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("herb_unweighted_uni.png", width = 8, height = 6)

# Generate Jaccard PCoA plot for herbivores
herb_ja$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(16,17), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*herb_ja$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*herb_ja$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("herb_jaccard.png", width = 8, height = 6)

# Generate Bray-Curtis PCoA plot for herbivores
herb_bc$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(16,17), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*herb_bc$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*herb_bc$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("herb_bray_curtis.png", width = 8, height = 6)

##### For captive omnivores
# Read the *qza outputs from QIIME2 to R
omni_uwuni <- read_qza("omni_unweighted_unifrac_pcoa_results.qza")
omni_ja <- read_qza("omni_jaccard_pcoa_results.qza")
omni_bc <- read_qza("omni_bray_curtis_pcoa_results.qza")

# Generate unweighted UniFrac PCoA plot for omnivores
omni_uwuni$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(16,17), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*omni_uwuni$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*omni_uwuni$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("omni_unweighted_uni.png", width = 8, height = 6)

# Generate Jaccard PCoA plot for omnivores
omni_ja$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(16,17), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*omni_ja$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*omni_ja$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("omni_jaccard.png", width = 8, height = 6)

# Generate Bray-Curtis PCoA plot for omnivores
omni_bc$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(16,17), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*omni_bc$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*omni_bc$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("omni_bray_curtis.png", width = 8, height = 6)

##### For captive carnivores
# Read the *qza outputs from QIIME2 to R
carni_ja <- read_qza("carni_jaccard_pcoa_results.qza")
carni_bc <- read_qza("carni_bray_curtis_pcoa_results.qza")

# Generate Jaccard PCoA plot for carnivores
carni_ja$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(15,18), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*carni_ja$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*carni_ja$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("carni_jaccard.png", width = 8, height = 6)

# Generate Bray-Curtis PCoA plot for carnivores
carni_bc$data$Vectors %>%
  select(SampleID, PC1, PC2) %>%
  left_join(metadata)%>%
  ggplot(aes(x=PC1, y=PC2, color = Taxonomy_Order, shape = dom_diet_cat)) +
  geom_point(alpha = 0.6, size = 5) + 
  theme_q2r() +
  scale_shape_manual(values=c(16,17), name="Dominant Diet Category") +
  scale_color_discrete(name="Animal Order") +
  theme(text = element_text(size = 15)) +
  xlab(paste("PC1:", round(100*carni_bc$data$ProportionExplained[1],2),"%")) +
  ylab(paste("PC2:", round(100*carni_bc$data$ProportionExplained[2],2),"%"))

# Save the PCoA plot generated
ggsave("carni_bray_curtis.png", width = 8, height = 6)
########################################

##### Plotting Data from Differential Abundance Analysis

########################################
##### Differential Abundance Plots
########################################
# Generate differential abundance plot for captive carnivores
## At class level
significant_capcarni_class <- significant_capcarni_class %>% 
  mutate(significant_capcarni, Class = factor(Class, levels = Class))

ggplot(significant_capcarni_class, aes(x = log2FoldChange, y = Class)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(x = expression(log[2]~Fold~Change),
       y = "Class") +
  scale_y_discrete(labels = c("Alphaproteobacteria", "Bacilli", "Flavobacteria")) +
  scale_x_continuous(limits = c(-30, 30)) +
  theme_bw() +
  theme(text = element_text(size = 20))

# Save the differential abundance plot for captive carnivores
ggsave("carni_da.png", width = 8, height = 2)


########################################
# Generate differential abundance plot for captive herbivores
## At class level
significant_capherb_class <- significant_capherb_class %>% 
  mutate(significant_capherb_class, Class = factor(Class, levels = Class))

ggplot(significant_capherb_class, aes(x = log2FoldChange, y = Class)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(x = expression(log[2]~Fold~Change),
       y = "Class") +
  scale_y_discrete(labels = c("4C0d-2", "Sphingobacteriia", "Fusobacteriia",
                              "Methanomicrobia", "Deltaproteobacteria", "Verruco-5",
                              "Mollicutes", "Fibrobacteria")) +
  scale_x_continuous(limits = c(-30, 30)) +
  theme_bw() +
  theme(text = element_text(size = 20))

# Save the differential abundance plot for captive herbivores
ggsave("herbi_da.png", width = 8, height = 6)


########################################
# Generate differential abundance plot for captive omnivores
## At class level
significant_capomni_class <- significant_capomni_class %>% 
  mutate(significant_capomni_class, Class = factor(Class, levels = Class))

ggplot(significant_capomni_class, aes(x = log2FoldChange, y = Class)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(x = expression(log[2]~Fold~Change),
       y = "Class") +
  scale_y_discrete(labels = c("Thermoplasmata", "Verrucomicrobiae")) +
  scale_x_continuous(limits = c(-30, 30)) +
  theme_bw() +
  theme(text = element_text(size = 20))

# Save the differential abundance plot for captive omnivores
ggsave("omni_da.png", width = 8, height = 2)
########################################


########################################
##### Relative Abundance Plots
########################################
##### Relative abundance plot for Fusobacteria in captive carnivore

# Filter the phylum of interest, which is Fusobacteria, in captive carnivores
Fuso <- subset_taxa(abundant_capcarni_phylum, Phylum == " p__Fusobacteria")

# Combine and convert all table information in the "Fuso" subset to a single data
# frame to allow for easier plotting
Fuso_long <- psmelt(Fuso)

# Plot relative abundance for Fusobacteria in carnivores
ggplot(Fuso_long, aes(x = dom_diet_cat, y = Abundance)) +
  stat_boxplot(geom = "errorbar", width = 0.5) + 
  geom_boxplot(outlier.size = 3) +
  labs(x     = "Dominant Diet Category",
       y     = "Relative Abundance") +
  theme(text = element_text(size = 20))

# Save the relative abundance boxplot for Fusobacteria in captive carnivores
ggsave("carni_Fuso.png", width = 6, height = 6)



########################################
# Generate differential abundance plot for captive herbivores
## At phylum level
significant_capherb_phylum <- significant_capherb_phylum %>% 
  mutate(significant_capherb_phylum, Phylum = factor(Phylum, levels = Phylum))

ggplot(significant_capherb_phylum, aes(x = log2FoldChange, y = Phylum)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(x = expression(log[2]~Fold~Change),
       y = "Phylum") +  
  scale_y_discrete(labels = c("Cyanobacteria", "Fusobacteria", "Tenericutes", 
                              "Fibrobacteres")) +
  scale_x_continuous(limits = c(-30, 30)) +
  theme_bw() +
  theme(text = element_text(size = 20))

# Save the differential abundance plot for captive herbivores
ggsave("herbi_da_phylum.png", width = 8, height = 3)


########################################
# Generate differential abundance plot for captive omnivores
## At phylum level
significant_capomni_phylum <- significant_capomni_phylum %>% 
  mutate(significant_capomni_phylum, Phylum = factor(Phylum, levels = Phylum))

ggplot(significant_capomni_phylum, aes(x = log2FoldChange, y = Phylum)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(x = expression(log[2]~Fold~Change),
       y = "Phylum") +
  scale_y_discrete(labels = c("Euryarchaeota")) +
  scale_x_continuous(limits = c(-30, 30)) +
  theme_bw() +
  theme(text = element_text(size = 20))

# Save the differential abundance plot for captive omnivores
ggsave("omni_da_phylum.png", width = 8, height = 2)
########################################

########################################
#####END
########################################