# de_Leon_et_al_2022_Supplemental_R_script.R # # Captivity plays a role in shaping the gut microbiome of social mammals # #========================================== # Notes about file directory set up for this script: # # File directories for data imported in this script are set up according to # the following guidelines: # # 1) metadata files are in the "\data" directory in the project working directory # # # 2) all qzv files imported are generated from the QIIME 2 pipeline and are # organized according to the experiment in which the data was generated. # please refer to Team8 lab notebook for details. # # For example, for any file from experiment DLTY4721-007, the directory to look # into is "\data\DLTY4721-007\" # # Note: this script uses data from three experiments in total: # "\data\DLTY4721-007\" # "\data\DLTY4721-013\" # "\data\DLTY4721-035\" # # # 3) Set up for each experiment directory: # # "\data\DLTY4721-007\": # all qza files generated in the experiment are included directly # # "\data\DLTY4721-013\": # this experiment includes two folders: # "\data\DLTY4721-013\core-metrics-results-non-social\" # "\data\DLTY4721-013\core-metrics-results-social\" # # "\data\DLTY4721-035\" # this experiment includes one folder: # "\data\DLTY4721-035\core-metrics-results-social-vs-non-social\" # # within each "core-metrics-results" folder are the files for the corresponding # alpha and beta diversity metrics # #========================================== # Load necessary packages library(tidyverse) library(phyloseq) library(qiime2R) library(ape) library(DESeq2) library(S4Vectors) library(dplyr) library(ggplot2) library(ggrepel) library(vegan) library(here) library(metagMisc) library(ggpubr) # # #========================================== # Setting up functions # # For differential abundance analysis # Relative abundance calculation calculate_relative_abundance <- function(x) x / sum(x) # # Calculate geometric mean calculate_gm_mean <- function(x, na.rm = TRUE) { exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))} # # #========================================== # DLTY4721-007 modifying metadata for initial processing # # Import metadata txt file and make it a tibble metadata_ori <- read_tsv(here::here("data","zoo_metadata.txt")) metadata_ori <- as_tibble(metadata_ori) # # Change metadata column name # Find column index of 10-2_SocialGrpSize colnames(metadata_ori) # the index is 58 # Change the column name colnames(metadata_ori)[58] <- "SocialGrpSize" # # Add column for social group size category metadata_mod <- mutate(metadata_ori, SocialGrpSizeCat = case_when( is.na(SocialGrpSize) ~ NA_character_, SocialGrpSize == 1 ~ NA_character_, SocialGrpSize <= 10 ~ "small", SocialGrpSize > 20 ~ "large", TRUE ~ "medium")) # # Replace / with _per_ in headers to avoid errors in later analysis colnames(metadata_mod) <- colnames(metadata_mod) %>% str_replace_all("/", "_per_") # # # Create a new tsv file for the modified metadata write_tsv(metadata_mod, here::here("data","zoo_metadata_mod.tsv")) # # #========================================== # DLTY4721-029 Differential abundance analysis with DESeq2 # # 1) Phyloseq Object Creation # # Import metadata file metadata <- read_tsv(here::here("data","zoo_metadata_mod.tsv")) # # Change metadata column name colnames(metadata)[1] <- "SampleID" # # Save modified metadata file write_tsv(metadata, here::here("data","zoo_metadata_mod_2.tsv")) # # Create phyloseq object for social animals (physeq_social <- qza_to_phyloseq( features=(here::here("data","DLTY4721-007","table-social.qza")), tree=(here::here("data","DLTY4721-007","rooted-tree.qza")), taxonomy=(here::here("data","DLTY4721-007","taxonomy.qza")), metadata=(here::here("data","zoo_metadata_mod_2.tsv")))) physeq_social # # Convert phyloseq tree into a dichotomous format phy_tree(physeq_social) <- multi2di(phy_tree(physeq_social)) # # # Create phyloseq object for non-social animals (physeq_non_social <- qza_to_phyloseq( features=(here::here("data","DLTY4721-007","table-non-social.qza")), tree=(here::here("data","DLTY4721-007","rooted-tree.qza")), taxonomy=(here::here("data","DLTY4721-007","taxonomy.qza")), metadata=(here::here("data","zoo_metadata_mod_2.tsv")))) physeq_non_social # # Convert phyloseq tree into a dichotomous format phy_tree(physeq_non_social) <- multi2di(phy_tree(physeq_non_social)) # #------------------------------------ # # 2) Filter samples by sequencing depth # # Filter social animals physeq_social <- prune_samples(sample_sums(physeq_social) >= 103804, physeq_social) # Check if samples were filtered according to the selected depth all(sample_sums(physeq_social) >= 103804) # # Filter non-social animals physeq_non_social <- prune_samples(sample_sums(physeq_non_social) >= 93885, physeq_non_social) # Check if samples were filtered according to the selected depth all(sample_sums(physeq_non_social) >= 93885) # #------------------------------------ # # 3) Remove low-abundant features # # Total abundance of all features across all samples social_counts <- taxa_sums(physeq_social) non_social_counts <- taxa_sums(physeq_non_social) # # Calculate relative abundance for social animals relative_abundance_social <- calculate_relative_abundance(social_counts) # # Calculate relative abundance for non-social animals relative_abundance_non_social <- calculate_relative_abundance(non_social_counts) # # Set a threshold of 0.05% for social animals abundant_social<- relative_abundance_social > 0.0005 (abundant_taxa_social<- prune_taxa(abundant_social, physeq_social)) # # Set a threshold of 0.05% for non-social animals abundant_non_social<- relative_abundance_non_social > 0.0005 (abundant_taxa_non_social<- prune_taxa(abundant_non_social, physeq_non_social)) # # Set taxanomic analysis to Phyla # Social animals (abundant_phyla_social <- tax_glom(abundant_taxa_social, taxrank = "Phylum")) # # Non-social animals (abundant_phyla_non_social <- tax_glom(abundant_taxa_non_social, taxrank = "Phylum")) # #------------------------------------ # # 4) DESeq object creation # # Social deseq_social <- phyloseq_to_deseq2(abundant_phyla_social, ~ captive_wild) # # Non-social deseq_non_social <- phyloseq_to_deseq2(abundant_phyla_non_social, ~captive_wild) # # Use "wild" as reference to express abundances in "captive" deseq_social$captive_wild <- relevel(deseq_social$captive_wild, ref = "wild") deseq_non_social$captive_wild <- relevel(deseq_non_social$captive_wild, ref = "wild") # # Calculate a normalization factor for social animals geo_means_social <- apply(counts(deseq_social), 1, calculate_gm_mean) deseq_social <- estimateSizeFactors(deseq_social, geoMeans = geo_means_social) deseq_social <- DESeq(deseq_social, fitType = "local") # # Calculate a normalization factor for non-social animals geo_means_non_social <- apply(counts(deseq_non_social), 1, calculate_gm_mean) deseq_non_social <- estimateSizeFactors(deseq_non_social, geoMeans = geo_means_non_social) deseq_non_social <- DESeq(deseq_non_social, fitType = "local") # #------------------------------------ # # 5) Extracting differentially abundant microbes for social animals # resultsNames(deseq_social) social_diff_abund <- results(deseq_social, name = "captive_wild_captive_vs_wild") # # Define the alpha level to determine significant changes alpha <- 0.05 (significant_social <- social_diff_abund %>% as.data.frame() %>% filter(padj < alpha)) # # Merge the table with significant results with the table of taxonomic information phyla_df_social <- as.data.frame(tax_table(abundant_phyla_social)) (significant_social <- significant_social %>% merge(phyla_df_social, by = "row.names") %>% arrange(log2FoldChange)) # # Generate plot to visualize the differential abundance data sig_social_plot <- ggplot(significant_social, aes(x = log2FoldChange, y = Phylum)) + geom_bar(stat = "identity") + labs(x = expression(log[2]~fold~change), y = "Phylum") + theme_classic() # sig_social_plot # # save plot ggsave("social_diff_abun.png", plot = sig_social_plot) # #------------------------------------ # # 6) Extracting differentially abundant microbes for non-social animals # resultsNames(deseq_non_social) non_social_diff_abund <- results(deseq_non_social, name = "captive_wild_captive_vs_wild") # # Define the alpha level to determine significant changes alpha <- 0.05 (significant_non_social <- non_social_diff_abund %>% as.data.frame() %>% filter(padj < alpha)) # # Merge the table with significant results with the table of taxonomic information phyla_df_non_social <- as.data.frame(tax_table(abundant_phyla_non_social)) (significant_non_social <- significant_non_social %>% merge(phyla_df_non_social, by = "row.names") %>% arrange(log2FoldChange)) # # Generate plot to visualize the differential abundance data sig_non_social_plot <- ggplot(significant_non_social, aes(x = log2FoldChange, y = Phylum)) + geom_bar(stat = "identity") + labs(x = expression(log[2]~fold~change), y = "Phylum") + theme_classic() # sig_non_social_plot # # save plot ggsave("non_social_diff_abun.png", plot = sig_non_social_plot) # #========================================== # DLTY4721-053 Re-creating alpha diversity bar plots # # import metadata meta <- read_tsv(here::here("data","zoo_metadata_mod.tsv")) # # replace captive and wild with captive mammals and wild mammals, in captive_wild meta$captive_wild <- meta$captive_wild %>% str_replace_all(c("captive"= "Captive Mammals", "wild"="Wild Mammals")) # # replace Yes and No with social mammals and non-social mammals, in SocialityHMC meta$SocialityHMC <- meta$SocialityHMC %>% str_replace_all(c("Yes"= "Social Mammals", "No"="Non-social Mammals")) # # # # 1) Social captive vs. wild Faith's PD # # import qza file social_faith_pd <- read_qza(here::here("data", "DLTY4721-013", "core-metrics-results-social", "faith_pd_vector.qza")) # # move sample names to new column to match the metadata and convert social_faith_pd from list to data frame social_faith_pd <- social_faith_pd$data %>% rownames_to_column("#SampleID") # # change column name colnames(social_faith_pd)[2] <- "social_faith_pd" # # merge social_faith_pd and metadata meta <- meta %>% left_join(social_faith_pd) # # visualize in a boxplot social_faith_pd_plot <- meta %>% filter(!is.na(social_faith_pd)) %>% ggplot(aes(x = captive_wild, y = social_faith_pd)) + geom_boxplot() + theme_classic() + xlab("") + ylab("Faith's PD Distance") + theme(axis.text=element_text(size=12), axis.title=element_text(size=12)) # social_faith_pd_plot # # save plot ggsave("social_faith_pd_plot.png", plot = social_faith_pd_plot) # #------------------------------------ # # 2) Social captive vs. wild Pielou's evenness # # import qza file social_evenness <- read_qza(here::here("data", "DLTY4721-013", "core-metrics-results-social", "evenness_vector.qza")) # # move sample names to new column to match the metadata and convert social_evenness from list to data frame social_evenness <- social_evenness$data %>% rownames_to_column("#SampleID") # # change column name colnames(social_evenness)[2] <- "social_pielou_evenness" # # merge social_evenness and metadata meta <- meta %>% left_join(social_evenness) # # visualize in a boxplot social_evenness_plot <- meta %>% filter(!is.na(social_pielou_evenness)) %>% ggplot(aes(x = captive_wild, y = social_pielou_evenness)) + geom_boxplot() + theme_classic() + xlab("") + ylab("Pielou's Evenness Index")+ theme(axis.text=element_text(size=12), axis.title=element_text(size=12)) + ylim(0,1) + geom_bracket(xmin = "Captive Mammals", xmax = "Wild Mammals", y.position = 0.97, label = "*", label.size = 5) # social_evenness_plot # # save plot ggsave("social_evenness_plot.png", plot = social_evenness_plot) # #------------------------------------ # # 3) Non-social captive vs. wild Faith's PD # # import qza file non_social_faith_pd <- read_qza(here::here("data", "DLTY4721-013", "core-metrics-results-non-social", "faith_pd_vector.qza")) # # move sample names to new column to match the metadata and convert non_social_faith_pd from list to data frame non_social_faith_pd <- non_social_faith_pd$data %>% rownames_to_column("#SampleID") # # change column name colnames(non_social_faith_pd)[2] <- "non_social_faith_pd" # # merge non_social_faith_pd and metadata meta <- meta %>% left_join(non_social_faith_pd) # # # visualize in a boxplot non_social_faith_pd_plot <- meta %>% filter(!is.na(non_social_faith_pd)) %>% ggplot(aes(x = captive_wild, y = non_social_faith_pd)) + geom_boxplot() + theme_classic() + xlab("") + ylab("Faith's PD Distance") + theme(axis.text=element_text(size=12), axis.title=element_text(size=12)) # non_social_faith_pd_plot # # save plot ggsave("non_social_faith_pd_plot.png", plot = non_social_faith_pd_plot) # #------------------------------------ # # 4) Non-social captive vs. wild Pielou's evenness # # import qza file non_social_evenness <- read_qza(here::here("data", "DLTY4721-013", "core-metrics-results-non-social", "evenness_vector.qza")) # # move sample names to new column to match the metadata and convert non_social_evenness from list to data frame non_social_evenness <- non_social_evenness$data %>% rownames_to_column("#SampleID") # # change column name colnames(non_social_evenness)[2] <- "non_social_pielou_evenness" # # merge non_social_evenness and metadata meta <- meta %>% left_join(non_social_evenness) # # visualize in a boxplot non_social_evenness_plot <- meta %>% filter(!is.na(non_social_pielou_evenness)) %>% ggplot(aes(x = captive_wild, y = non_social_pielou_evenness)) + geom_boxplot() + theme_classic() + xlab("") + ylab("Pielou's Evenness Index") + theme(axis.text=element_text(size=12), axis.title=element_text(size=12)) + ylim(0,1) # non_social_evenness_plot # # save plot ggsave("non_social_evenness_plot.png", plot = non_social_evenness_plot) # #------------------------------------ # # 5) Social vs. Non-social Faith's PD # # import qza file social_non_social_faith_pd <- read_qza(here::here("data", "DLTY4721-035", "core-metrics-results-social-vs-non-social", "faith_pd_vector.qza")) # # move sample names to new column to match the metadata and convert social_non_social_faith_pd from list to data frame social_non_social_faith_pd <- social_non_social_faith_pd$data %>% rownames_to_column("#SampleID") # # change column name colnames(social_non_social_faith_pd)[2] <- "social_non_social_faith_pd" # # merge social_non_social_faith_pd and metadata meta <- meta %>% left_join(social_non_social_faith_pd) # # visualize in a boxplot social_non_social_faith_pd_plot <- meta %>% filter(!is.na(social_non_social_faith_pd)) %>% ggplot(aes(x = SocialityHMC, y = social_non_social_faith_pd)) + geom_boxplot() + theme_classic() + xlab("") + ylab("Faith's PD Distance") + theme(axis.text=element_text(size=12), axis.title=element_text(size=12)) # social_non_social_faith_pd_plot # # save plot ggsave("social_vs_non_social_faith_pd_plot.png", plot = social_non_social_faith_pd_plot) # #------------------------------------ # # 6) Social vs. Non-social Pielou's evenness # # import qza file social_non_social_evenness <- read_qza(here::here("data", "DLTY4721-035", "core-metrics-results-social-vs-non-social", "evenness_vector.qza")) # # move sample names to new column to match the metadata and convert social_non_social_evenness from list to data frame social_non_social_evenness <- social_non_social_evenness$data %>% rownames_to_column("#SampleID") # # change column name colnames(social_non_social_evenness)[2] <- "social_non_social_pielou_evenness" # # merge social_non_social_evenness and metadata meta <- meta %>% left_join(social_non_social_evenness) # # visualize in a boxplot social_non_social_evenness_plot <- meta %>% filter(!is.na(social_non_social_pielou_evenness)) %>% ggplot(aes(x = SocialityHMC, y = social_non_social_pielou_evenness)) + geom_boxplot() + theme_classic() + xlab("") + ylab("Pielou's Evenness Index") + theme(axis.text=element_text(size=12), axis.title=element_text(size=12)) + ylim(0,1) + geom_bracket(xmin = "Non-social Mammals", xmax = "Social Mammals", y.position = 0.97, label = "*", label.size = 5) # social_non_social_evenness_plot # # save plot ggsave("social_vs_non_social_evenness_plot.png", plot = social_non_social_evenness_plot)