# Increased body mass is associated with decreased gut microbiome diversity in Parkinson's Disease patients # Novia Chan, Clarisse Echavez, Angela Mathews, Josie Setiawan # Department of Microbiology and Immunology, University of British Columbia, Vancouver, British Columbia, Canada ## Supplemental R Script # Open a new R session # Set working directory as to where the .qza files and metadata .tsv files are contained ## Alpha diversity metrics # Load CRAN packages library(tidyverse) #Loading special packages library(qiime2R) # Import metadata sorted_parkinsons_metadata <- read_tsv('parkinsons_metadata_sorted_grouped.tsv') ## Faith's phylogenetic diversity # Import faith .qza file faith_pd <- read_qza("faith_pd_vector.qza") faith <- faith_pd$data %>% rownames_to_column("SampleID") faith_pd_metadata <- faith %>% left_join(sorted_parkinsons_metadata, by = c("SampleID" = "#SampleID")) # Plot Faith's phylogenetic diversity ggplot(faith_pd_metadata,aes(x = BMI_grouping, y = faith_pd)) + stat_boxplot(geom = "errorbar", width = 0.2) + geom_boxplot() + scale_y_continuous(breaks = seq(10, 26, by = 2)) + scale_x_discrete(limits=c("healthy","overweight","obese")) + theme_classic() + theme(strip.background =element_rect(fill="white", linetype = "blank"), axis.text.x = element_text(size = 12), axis.title.x = element_text(face="bold", size = 16), axis.text.y = element_text(size = 12), axis.title.y = element_text(face="bold", size = 16)) + labs(x = "BMI grouping", y = "Faith's phylogenetic diversity",) ## Pielou's evenness # Import evenness .qza file evenness_pd <- read_qza("evenness_vector.qza") evenness <- evenness_pd$data %>% rownames_to_column("SampleID") evenness_pd_metadata <- evenness %>% left_join(sorted_parkinsons_metadata, by = c("SampleID" = "#SampleID")) # Plot Pielou's evenness ggplot(evenness_pd_metadata,aes(x = BMI_grouping, y = pielou_evenness)) + stat_boxplot(geom = "errorbar", width = 0.2) + geom_boxplot() + scale_x_discrete(limits=c("healthy", "overweight", "obese")) + theme_classic() + theme(strip.background =element_rect(fill="white", linetype = "blank"), axis.text.x = element_text(size = 12), axis.title.x = element_text(face="bold", size = 16), axis.text.y = element_text(size = 12), axis.title.y = element_text(face="bold", size = 16)) + labs(x = "BMI grouping", y = "Pielou's Evenness",) # Open a new R session # Set working directory as to where the .qza files and metadata .tsv files are contained ## Differential Abundance # Load CRAN packages library(vegan) library(ape) library(tidyverse) # Load Bioconductor packages library(phyloseq) library(DESeq2) #Loading special packages library(qiime2R) # Import the sorted Parkinson's metadata into R p_metadata <- read_tsv("parkinsons_metadata_sorted_grouped.tsv") # Subset metadata to only include healthy and obese BMI groupings PD_metadata <- p_metadata[(p_metadata$BMI_grouping=="healthy") | (p_metadata$BMI_grouping=="obese"),] %>% subset(Disease=="PD") # Remove the "#SampleID" header for the first column of the metadata to prepare for creation of a phyloseq object PD_metadata <- PD_metadata %>% column_to_rownames(var = "#SampleID") # Preparing.qza files and metadata and creating a phyloseq object sam_data = sample_data(PD_metadata) features_phyloseq <- qza_to_phyloseq(features = "parkinsons-table.qza") tree_phyloseq <- qza_to_phyloseq(tree = "rooted-tree.qza") taxonomy_phyloseq <- qza_to_phyloseq(taxonomy = "parkinsons-taxonomy.qza") phyloseq_object_all = phyloseq(sample_data(sam_data), features_phyloseq, tree_phyloseq, taxonomy_phyloseq) # Access the otu_table(), sample_data(), and tax_table() phyloseq object components taxa_table <- phyloseq_object_all@otu_table taxonomy <- phyloseq_object_all@tax_table metadata <- phyloseq_object_all@sam_data # Convert the phylogenetic tree from multichotomous to dichotomous format phy_tree <- multi2di(phy_tree(tree_phyloseq)) # Review the samples for the number of reads sample_sums(phyloseq_object_all) # Rarefy dataset to 10232 reads per sample based on QIIME2 analysis physeq_rar <- rarefy_even_depth(phyloseq_object_all, sample.size = 10232) # Load the relative abundance function calculate_relative_abundance <- function(x) x / sum(x) # Calculate relative abundance physeq_RA <- transform_sample_counts(phyloseq_object_all, calculate_relative_abundance) # Remove low-abundant features total_counts <- taxa_sums(phyloseq_object_all) relative_abundance <- calculate_relative_abundance(total_counts) # Filter out sequences that have a higher relative abundance than a threshold ratio of 0.0005 abundant <- relative_abundance > 0.0005 (abundant_taxa <- prune_taxa(abundant, phyloseq_object_all)) # Set taxonomic levels for analysis (family <- tax_glom(phyloseq_object_all, taxrank = "Family", NArm = FALSE)) # DESeq Object Creation with the categorical variable of BMI grouping deseq_BMI_grouping <- phyloseq_to_deseq2(family, ~ BMI_grouping) # Set the healthy BMI grouping as the reference deseq_BMI_grouping$BMI_grouping <- relevel(deseq_BMI_grouping$BMI_grouping, ref = "healthy") # Load the geometric mean function calculate_gm_mean <- function(x, na.rm = TRUE) { exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x)) } # Calculate geometric means geo_means <- apply(counts(deseq_BMI_grouping), 1, calculate_gm_mean) deseq_BMI_grouping <- estimateSizeFactors(deseq_BMI_grouping, geoMeans = geo_means) deseq_BMI_grouping <- DESeq(deseq_BMI_grouping, fitType = "local") # View the performed comparisons from DESeq2 resultsNames(deseq_BMI_grouping) # Extracting differentially abundant microbes with the obese vs healthy BMI grouping comparison BMI_grouping_diff_abund <- results(deseq_BMI_grouping, name = "BMI_grouping_obese_vs_healthy") # Define the alpha level to determine significant changes alpha <- 0.05 # Convert results from DESeq to a data frame for further processing and filter any taxa in which the FDR-corrected p-value is below the alpha level (significant_BMI_grouping <- BMI_grouping_diff_abund %>% as.data.frame() %>% filter(padj < alpha)) # Merge table with significant results with the table of taxonomic information according to "row.names" and sort data frame by log2FoldChange from lowest to highest family_df <- as.data.frame(tax_table(family)) (significant_BMI_grouping <- significant_BMI_grouping %>% merge(family_df, by = "row.names") %>% arrange(log2FoldChange)) # Visualize the differential abundance family data ggplot(significant_BMI_grouping, aes(x = log2FoldChange, y = Family)) + geom_bar(stat = "identity") + labs(title = "Differential abundant family", x = expression(log[2]~fold~change), y = "Family") + theme_bw(base_size = 30) # Open a new R session # Set working directory as to where the .qza files and metadata .tsv files are contained ## Indicator taxa analysis # Load necessary packages library(tidyverse) library(phyloseq) library(indicspecies) library(qiime2R) # Function to group asv table by higher order taxonomy group_by_taxonomy = function(asv_table, taxonomy, rank){ asv_table = as.data.frame(asv_table) taxonomy = as.data.frame(taxonomy) taxonomy$ASV = rownames(taxonomy) asv_table$ASV = rownames(asv_table) asv_table = inner_join(taxonomy,asv_table,by="ASV") asv_table$taxa = apply(asv_table[,1:rank],1,paste,collapse=" ") asv_table = asv_table[,-(1:8)] asv_table = group_by(data.frame(asv_table),taxa) taxa_table = as.data.frame(summarise_all(asv_table,sum)) rownames(taxa_table) = taxa_table$taxa return(taxa_table[,-1]) } # Import the sorted Parkinson's metadata into R p_metadata <- read_tsv("parkinsons_metadata_sorted_grouped.tsv") # Subset metadata to only include healthy, overweight, and obese BMI groupings PD_metadata <- p_metadata[(p_metadata$BMI_grouping=="healthy") | (p_metadata$BMI_grouping=="overweight") | (p_metadata$BMI_grouping=="obese"),] %>% subset(Disease=="PD") # Remove the "#SampleID" header for the first column of the metadata to prepare for creation of a phyloseq object PD_metadata <- PD_metadata %>% column_to_rownames(var = "#SampleID") # Preparing.qza files and metadata and creating a phyloseq object sam_data = sample_data(PD_metadata) features_phyloseq <- qza_to_phyloseq(features = "parkinsons-table.qza") tree_phyloseq <- qza_to_phyloseq(tree = "rooted-tree.qza") taxonomy_phyloseq <- qza_to_phyloseq(taxonomy = "parkinsons-taxonomy.qza") phyloseq_object_all = phyloseq(sample_data(sam_data), features_phyloseq, tree_phyloseq, taxonomy_phyloseq) # Access the otu_table(), sample_data(), and tax_table() phyloseq object components taxa_table <- phyloseq_object_all@otu_table taxonomy <- phyloseq_object_all@tax_table metadata <- phyloseq_object_all@sam_data # Create the taxa table for the rank of taxonomy required # Kingdom=1, Phylum=2, Class=3, Order=4, Family=5, Genus=6, Species=7 taxa_table = group_by_taxonomy(taxa_table, taxonomy, 5) # Calculate indicator values indicator_multipatt = multipatt(t(taxa_table),metadata$BMI_grouping,duleg=T) # Look at output summary(indicator_multipatt) # Write output to file (only writes significant indicators) indicator_output = capture.output(summary(indicator_multipatt,indvalcomp=TRUE)) write.table(indicator_output,file="indiciator_values_family_healthyoverweightobese_PD.txt",row.names=F,quote=F)