# de_Leon_et_al_2022_Supplemental_R_script.R
#
# Captivity plays a role in shaping the gut microbiome of social mammals
#
#==========================================
# Notes about file directory set up for this script:
#
# File directories for data imported in this script are set up according to 
# the following guidelines:
#
# 1) metadata files are in the "\data" directory in the project working directory
#
#
# 2) all qzv files imported are generated from the QIIME 2 pipeline and are 
#    organized according to the experiment in which the data was generated.
#    please refer to Team8 lab notebook for details.
#
# For example, for any file from experiment DLTY4721-007, the directory to look 
# into is "\data\DLTY4721-007\"
#
# Note: this script uses data from three experiments in total:
# "\data\DLTY4721-007\"
# "\data\DLTY4721-013\"
# "\data\DLTY4721-035\"
#
#
# 3) Set up for each experiment directory:
#
# "\data\DLTY4721-007\": 
# all qza files generated in the experiment are included directly
# 
# "\data\DLTY4721-013\": 
# this experiment includes two folders:
# "\data\DLTY4721-013\core-metrics-results-non-social\"
# "\data\DLTY4721-013\core-metrics-results-social\"
#
# "\data\DLTY4721-035\"
# this experiment includes one folder:
# "\data\DLTY4721-035\core-metrics-results-social-vs-non-social\"
#
# within each "core-metrics-results" folder are the files for the corresponding 
# alpha and beta diversity metrics
#
#==========================================
# Load necessary packages
library(tidyverse)
library(phyloseq)
library(qiime2R)
library(ape)
library(DESeq2)
library(S4Vectors)
library(dplyr)
library(ggplot2)
library(ggrepel)
library(vegan)
library(here)
library(metagMisc)
library(ggpubr)
#
#
#==========================================
# Setting up functions
#
# For differential abundance analysis
# Relative abundance calculation
calculate_relative_abundance <- function(x) x / sum(x)
#
# Calculate geometric mean
calculate_gm_mean <- function(x, na.rm = TRUE) {
  exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))}
#
#
#==========================================
# DLTY4721-007 modifying metadata for initial processing
#
# Import metadata txt file and make it a tibble
metadata_ori <- read_tsv(here::here("data","zoo_metadata.txt"))
metadata_ori <- as_tibble(metadata_ori)
#
# Change metadata column name
# Find column index of 10-2_SocialGrpSize
colnames(metadata_ori) # the index is 58
# Change the column name
colnames(metadata_ori)[58] <- "SocialGrpSize"
#
# Add column for social group size category
metadata_mod <- mutate(metadata_ori,
                       SocialGrpSizeCat = case_when(
                         is.na(SocialGrpSize) ~ NA_character_,
                         SocialGrpSize == 1 ~ NA_character_,
                         SocialGrpSize <= 10 ~ "small",
                         SocialGrpSize > 20 ~ "large",
                         TRUE ~ "medium"))
#
# Replace / with _per_ in headers to avoid errors in later analysis
colnames(metadata_mod) <- colnames(metadata_mod) %>% 
  str_replace_all("/", "_per_")
#
#
# Create a new tsv file for the modified metadata
write_tsv(metadata_mod,
          here::here("data","zoo_metadata_mod.tsv"))
#
#
#==========================================
# DLTY4721-029 Differential abundance analysis with DESeq2
#
# 1) Phyloseq Object Creation
#
# Import metadata file
metadata <- read_tsv(here::here("data","zoo_metadata_mod.tsv"))
#
# Change metadata column name
colnames(metadata)[1] <- "SampleID"
#
# Save modified metadata file
write_tsv(metadata, here::here("data","zoo_metadata_mod_2.tsv"))
#
# Create phyloseq object for social animals
(physeq_social <- qza_to_phyloseq(
  features=(here::here("data","DLTY4721-007","table-social.qza")), 
  tree=(here::here("data","DLTY4721-007","rooted-tree.qza")),
  taxonomy=(here::here("data","DLTY4721-007","taxonomy.qza")),
  metadata=(here::here("data","zoo_metadata_mod_2.tsv"))))
physeq_social
#
# Convert phyloseq tree into a dichotomous format
phy_tree(physeq_social) <- multi2di(phy_tree(physeq_social))
#
#
# Create phyloseq object for non-social animals
(physeq_non_social <- qza_to_phyloseq(
  features=(here::here("data","DLTY4721-007","table-non-social.qza")),
  tree=(here::here("data","DLTY4721-007","rooted-tree.qza")),
  taxonomy=(here::here("data","DLTY4721-007","taxonomy.qza")),
  metadata=(here::here("data","zoo_metadata_mod_2.tsv"))))
physeq_non_social
#
# Convert phyloseq tree into a dichotomous format
phy_tree(physeq_non_social) <- multi2di(phy_tree(physeq_non_social))
#
#------------------------------------
#
# 2) Filter samples by sequencing depth
#
# Filter social animals
physeq_social <- prune_samples(sample_sums(physeq_social) >= 103804, 
                               physeq_social)
# Check if samples were filtered according to the selected depth
all(sample_sums(physeq_social) >= 103804)
#
# Filter non-social animals
physeq_non_social <- prune_samples(sample_sums(physeq_non_social) >= 93885, 
                                   physeq_non_social)
# Check if samples were filtered according to the selected depth
all(sample_sums(physeq_non_social) >= 93885)
#
#------------------------------------
#
# 3) Remove low-abundant features
#
# Total abundance of all features across all samples 
social_counts <- taxa_sums(physeq_social)
non_social_counts <- taxa_sums(physeq_non_social)
#
# Calculate relative abundance for social animals 
relative_abundance_social <- calculate_relative_abundance(social_counts)
#
# Calculate relative abundance for non-social animals 
relative_abundance_non_social <- calculate_relative_abundance(non_social_counts)
#
# Set a threshold of 0.05% for social animals
abundant_social<- relative_abundance_social > 0.0005 
(abundant_taxa_social<- prune_taxa(abundant_social, physeq_social))
#
# Set a threshold of 0.05% for non-social animals
abundant_non_social<- relative_abundance_non_social > 0.0005 
(abundant_taxa_non_social<- prune_taxa(abundant_non_social, physeq_non_social))
#
# Set taxanomic analysis to Phyla
# Social animals
(abundant_phyla_social <- tax_glom(abundant_taxa_social, 
                                   taxrank = "Phylum"))
#
# Non-social animals
(abundant_phyla_non_social <- tax_glom(abundant_taxa_non_social, 
                                       taxrank = "Phylum"))
#
#------------------------------------
#
# 4) DESeq object creation
#
# Social
deseq_social <- phyloseq_to_deseq2(abundant_phyla_social, ~ captive_wild)
#
# Non-social
deseq_non_social <- phyloseq_to_deseq2(abundant_phyla_non_social, ~captive_wild)
#
# Use "wild" as reference to express abundances in "captive"
deseq_social$captive_wild <- relevel(deseq_social$captive_wild, 
                                     ref = "wild")
deseq_non_social$captive_wild <- relevel(deseq_non_social$captive_wild, 
                                         ref = "wild")
#
# Calculate a normalization factor for social animals
geo_means_social <- apply(counts(deseq_social), 1, calculate_gm_mean)
deseq_social <- estimateSizeFactors(deseq_social, 
                                    geoMeans = geo_means_social)
deseq_social <- DESeq(deseq_social, fitType = "local")
#
# Calculate a normalization factor for non-social animals
geo_means_non_social <- apply(counts(deseq_non_social), 1, calculate_gm_mean)
deseq_non_social <- estimateSizeFactors(deseq_non_social, 
                                        geoMeans = geo_means_non_social)
deseq_non_social <- DESeq(deseq_non_social, fitType = "local")
#
#------------------------------------
#
# 5) Extracting differentially abundant microbes for social animals
#
resultsNames(deseq_social)
social_diff_abund <- results(deseq_social, 
                             name = "captive_wild_captive_vs_wild")
#
# Define the alpha level to determine significant changes 
alpha <- 0.05
(significant_social <- social_diff_abund %>% 
    as.data.frame() %>% 
    filter(padj < alpha))
#
# Merge the table with significant results with the table of taxonomic information
phyla_df_social <- as.data.frame(tax_table(abundant_phyla_social))
(significant_social <- significant_social %>% 
    merge(phyla_df_social, by = "row.names") %>% 
    arrange(log2FoldChange))
#
# Generate plot to visualize the differential abundance data
sig_social_plot <- ggplot(significant_social, 
                          aes(x = log2FoldChange, y = Phylum)) +
  geom_bar(stat = "identity") +
  labs(x = expression(log[2]~fold~change),
       y = "Phylum") +
  theme_classic()
#
sig_social_plot
#
# save plot
ggsave("social_diff_abun.png", 
       plot = sig_social_plot)
#
#------------------------------------
#
# 6) Extracting differentially abundant microbes for non-social animals
#
resultsNames(deseq_non_social)
non_social_diff_abund <- results(deseq_non_social, 
                                 name = "captive_wild_captive_vs_wild")
#
# Define the alpha level to determine significant changes 
alpha <- 0.05
(significant_non_social <- non_social_diff_abund %>% 
    as.data.frame() %>% 
    filter(padj < alpha))
#
# Merge the table with significant results with the table of taxonomic information
phyla_df_non_social <- as.data.frame(tax_table(abundant_phyla_non_social))
(significant_non_social <- significant_non_social %>% 
    merge(phyla_df_non_social, by = "row.names") %>% 
    arrange(log2FoldChange))
#
# Generate plot to visualize the differential abundance data
sig_non_social_plot <- ggplot(significant_non_social, 
                              aes(x = log2FoldChange, y = Phylum)) +
  geom_bar(stat = "identity") +
  labs(x = expression(log[2]~fold~change),
       y = "Phylum") +
  theme_classic()
#
sig_non_social_plot
#
# save plot
ggsave("non_social_diff_abun.png", 
       plot = sig_non_social_plot)
#
#==========================================
# DLTY4721-053 Re-creating alpha diversity bar plots
#
# import metadata
meta <- read_tsv(here::here("data","zoo_metadata_mod.tsv"))
#
# replace captive and wild with captive mammals and wild mammals, in captive_wild
meta$captive_wild <- meta$captive_wild %>% 
  str_replace_all(c("captive"= "Captive Mammals", "wild"="Wild Mammals"))
#
# replace Yes and No with social mammals and non-social mammals, in SocialityHMC
meta$SocialityHMC <- meta$SocialityHMC %>% 
  str_replace_all(c("Yes"= "Social Mammals", "No"="Non-social Mammals"))
#
#
#
# 1) Social captive vs. wild Faith's PD
#
# import qza file
social_faith_pd <- read_qza(here::here("data",
                                       "DLTY4721-013",
                                       "core-metrics-results-social",
                                       "faith_pd_vector.qza"))
#
# move sample names to new column to match the metadata and convert social_faith_pd from list to data frame
social_faith_pd <- social_faith_pd$data %>% rownames_to_column("#SampleID")
#
# change column name
colnames(social_faith_pd)[2] <- "social_faith_pd"
#
# merge social_faith_pd and metadata
meta <- meta %>% left_join(social_faith_pd)
#
# visualize in a boxplot
social_faith_pd_plot <- meta %>%
  filter(!is.na(social_faith_pd)) %>% 
  ggplot(aes(x = captive_wild,
             y = social_faith_pd)) +
  geom_boxplot() +
  theme_classic() +
  xlab("") +
  ylab("Faith's PD Distance") +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12))
#
social_faith_pd_plot
#
# save plot
ggsave("social_faith_pd_plot.png", 
       plot = social_faith_pd_plot)
#
#------------------------------------
#
# 2) Social captive vs. wild Pielou's evenness
#
# import qza file
social_evenness <- read_qza(here::here("data",
                                       "DLTY4721-013",
                                       "core-metrics-results-social",
                                       "evenness_vector.qza"))
#
# move sample names to new column to match the metadata and convert social_evenness from list to data frame
social_evenness <- social_evenness$data %>% rownames_to_column("#SampleID")
#
# change column name
colnames(social_evenness)[2] <- "social_pielou_evenness"
#
# merge social_evenness and metadata
meta <- meta %>% left_join(social_evenness)
#
# visualize in a boxplot
social_evenness_plot <- meta %>%
  filter(!is.na(social_pielou_evenness)) %>% 
  ggplot(aes(x = captive_wild,
             y = social_pielou_evenness)) +
  geom_boxplot() +
  theme_classic() +
  xlab("") +
  ylab("Pielou's Evenness Index")+
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12)) +
  ylim(0,1) +
  geom_bracket(xmin = "Captive Mammals", 
               xmax = "Wild Mammals",
               y.position = 0.97,
               label = "*",
               label.size = 5)
#
social_evenness_plot
#
# save plot
ggsave("social_evenness_plot.png", 
       plot = social_evenness_plot)
#
#------------------------------------
#
# 3) Non-social captive vs. wild Faith's PD
#
# import qza file
non_social_faith_pd <- read_qza(here::here("data",
                                           "DLTY4721-013",
                                           "core-metrics-results-non-social",
                                           "faith_pd_vector.qza"))
#
# move sample names to new column to match the metadata and convert non_social_faith_pd from list to data frame
non_social_faith_pd <- non_social_faith_pd$data %>% rownames_to_column("#SampleID")
#
# change column name
colnames(non_social_faith_pd)[2] <- "non_social_faith_pd"
#
# merge non_social_faith_pd and metadata
meta <- meta %>% left_join(non_social_faith_pd)
#
#
# visualize in a boxplot
non_social_faith_pd_plot <- meta %>%
  filter(!is.na(non_social_faith_pd)) %>% 
  ggplot(aes(x = captive_wild,
             y = non_social_faith_pd)) +
  geom_boxplot() +
  theme_classic() +
  xlab("") +
  ylab("Faith's PD Distance") +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12))
#
non_social_faith_pd_plot
#
# save plot
ggsave("non_social_faith_pd_plot.png", 
       plot = non_social_faith_pd_plot)
#
#------------------------------------
#
# 4) Non-social captive vs. wild Pielou's evenness
#
# import qza file
non_social_evenness <- read_qza(here::here("data",
                                           "DLTY4721-013",
                                           "core-metrics-results-non-social",
                                           "evenness_vector.qza"))
#
# move sample names to new column to match the metadata and convert non_social_evenness from list to data frame
non_social_evenness <- non_social_evenness$data %>% rownames_to_column("#SampleID")
#
# change column name
colnames(non_social_evenness)[2] <- "non_social_pielou_evenness"
#
# merge non_social_evenness and metadata
meta <- meta %>% left_join(non_social_evenness)
#
# visualize in a boxplot
non_social_evenness_plot <- meta %>%
  filter(!is.na(non_social_pielou_evenness)) %>% 
  ggplot(aes(x = captive_wild,
             y = non_social_pielou_evenness)) +
  geom_boxplot() +
  theme_classic() +
  xlab("") +
  ylab("Pielou's Evenness Index") +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12)) +
  ylim(0,1)
#
non_social_evenness_plot
#
# save plot
ggsave("non_social_evenness_plot.png", 
       plot = non_social_evenness_plot)
#
#------------------------------------
#
# 5) Social vs. Non-social Faith's PD
#
# import qza file
social_non_social_faith_pd <- read_qza(here::here("data",
                                                  "DLTY4721-035",
                                                  "core-metrics-results-social-vs-non-social",
                                                  "faith_pd_vector.qza"))
#
# move sample names to new column to match the metadata and convert social_non_social_faith_pd from list to data frame
social_non_social_faith_pd <- social_non_social_faith_pd$data %>% rownames_to_column("#SampleID")
#
# change column name
colnames(social_non_social_faith_pd)[2] <- "social_non_social_faith_pd"
#
# merge social_non_social_faith_pd and metadata
meta <- meta %>% left_join(social_non_social_faith_pd)
#
# visualize in a boxplot
social_non_social_faith_pd_plot <- meta %>%
  filter(!is.na(social_non_social_faith_pd)) %>% 
  ggplot(aes(x = SocialityHMC,
             y = social_non_social_faith_pd)) +
  geom_boxplot() +
  theme_classic() +
  xlab("") +
  ylab("Faith's PD Distance") +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12))
#
social_non_social_faith_pd_plot
#
# save plot
ggsave("social_vs_non_social_faith_pd_plot.png", 
       plot = social_non_social_faith_pd_plot)
#
#------------------------------------
#
# 6) Social vs. Non-social Pielou's evenness
#
# import qza file
social_non_social_evenness <- read_qza(here::here("data",
                                                  "DLTY4721-035",
                                                  "core-metrics-results-social-vs-non-social",
                                                  "evenness_vector.qza"))
#
# move sample names to new column to match the metadata and convert social_non_social_evenness from list to data frame
social_non_social_evenness <- social_non_social_evenness$data %>% rownames_to_column("#SampleID")
#
# change column name
colnames(social_non_social_evenness)[2] <- "social_non_social_pielou_evenness"
#
# merge social_non_social_evenness and metadata
meta <- meta %>% left_join(social_non_social_evenness)
#
# visualize in a boxplot
social_non_social_evenness_plot <- meta %>%
  filter(!is.na(social_non_social_pielou_evenness)) %>% 
  ggplot(aes(x = SocialityHMC,
             y = social_non_social_pielou_evenness)) +
  geom_boxplot() +
  theme_classic() +
  xlab("") +
  ylab("Pielou's Evenness Index") +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=12)) +
  ylim(0,1) +
  geom_bracket(xmin = "Non-social Mammals", 
               xmax = "Social Mammals",
               y.position = 0.97,
               label = "*",
               label.size = 5)
#
social_non_social_evenness_plot
#
# save plot
ggsave("social_vs_non_social_evenness_plot.png", 
       plot = social_non_social_evenness_plot)