#!/bin/bash mkdir Parkinsons_Data cd Parkinsons_Data #I did not realize there were already metadata on the server so I took the excel file on the course page, pasted into google sheets and saved as a .tsv. Then I copied it to the server in which it was called "Parkinsons_Metadata" cp -R /mnt/datasets/project_2/parkinsons/seqs /root/Parkinsons_Data mv parkinsons_manifest.txt parkinsons_manifest.tsv cp mnt/datasets/project_2/parkinsons/ #Putting metadata into a QIIME2 visual format qiime metadata tabulate \ --m-input-file parkinsons_metadata.tsv \ --o-visualization parkinsons_metadata.qzv #Creating a demultiplexed sequence summary artifact qiime tools import \ --type "SampleData[SequencesWithQuality]" \ --input-format SingleEndFastqManifestPhred33V2 \ --input-path ./parkinsons_manifest.tsv \ --output-path ./demux_seqs.qza #Creating a demultiplexed sequence summary table qiime demux summarize \ --i-data ./demux_seqs.qza \ --o-visualization ./demux_seqs.qzv #Denoising our sequences and truncation to 249 because 249 was the smallest read length qiime dada2 denoise-single \ --i-demultiplexed-seqs ./demux_seqs.qza \ --p-trunc-len 249 \ --o-table ./dada2_table.qza \ --o-representative-sequences ./dada2_rep_set.qza \ --o-denoising-stats ./dada2_stats.qza #Create a visualization for the data after denoising qiime metadata tabulate \ --m-input-file ./dada2_stats.qza \ --o-visualization ./dada2_stats.qzv #Creating the Feature Table qiime feature-table summarize \ --i-table ./dada2_table.qza \ --m-sample-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./dada2_table.qzv #Downloaded the greengenes 13_8 reference sequence for phylogenetic relationships also looks at specific region of 16S rRNA gene (V4 region) - 515F/806R wget \ -O "sepp-refs-gg-13-8.qza" \ "https://data.qiime2.org/2020.11/common/sepp-refs-gg-13-8.qza" #Introducing the representative sequence for creation of phylogenetic relationships qiime fragment-insertion sepp \ --i-representative-sequences ./dada2_rep_set.qza \ --i-reference-database sepp-refs-gg-13-8.qza \ --o-tree ./tree.qza \ --o-placements ./tree_placements.qza \ --p-threads 1 #Creating alpha rarefaction table from sampling depth 100 to 19000 qiime diversity alpha-rarefaction \ --i-table ./dada2_table.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./alpha_rarefaction_curves.qzv \ --p-min-depth 100 \ --p-max-depth 19000 #Creating many PCoA plots of various beta diversity metrics while also creating artifacts for further analysis qiime diversity core-metrics-phylogenetic \ --i-table ./dada2_table.qza \ --i-phylogeny ./tree.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --p-sampling-depth 6000 \ --output-dir ./core-metrics-results_6000 #Creating Faith Phylogenetic diversity boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000/faith_pd_vector.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./core-metrics-results_6000/faiths-diversity.qzv #Creating Evenness Boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000/evenness_vector.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./core-metrics-results_6000/evenness_statistics.qzv #Creating unweighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000/unweighted_unifrac_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Disease \ --o-visualization core-metrics-results_6000/unweighted-unifrac-Disease-significance.qzv #Creating weighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000/weighted_unifrac_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Disease \ --o-visualization core-metrics-results_6000/weighted-unifrac-Disease-significance.qzv #Bray Curtis Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000/bray_curtis_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Disease \ --o-visualization core-metrics-results_6000/bray-curtis-Disease-significance.qzv #Jaccard Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000/jaccard_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Disease \ --o-visualization core-metrics-results_6000/jaccard-Disease-significance.qzv wget \ -O "gg-13-8-99-515-806-nb-classifier.qza" \ "https://data.qiime2.org/2021.2/common/gg-13-8-99-515-806-nb-classifier.qza" #Using pre-trained classifier from green genes to setup taxonomy artifact qiime feature-classifier classify-sklearn \ --i-reads ./dada2_rep_set.qza \ --i-classifier ./gg-13-8-99-515-806-nb-classifier.qza \ --o-classification ./taxonomy.qza #Creating Taxonomy classification of ASVs qiime metadata tabulate \ --m-input-file ./taxonomy.qza \ --o-visualization ./taxonomy.qzv #Creating a table of sequences within the taxonomy qiime feature-table tabulate-seqs \ --i-data ./dada2_rep_set.qza \ --o-visualization ./dada2_rep_set.qzv #Creating feature table of only 6000 sampling depth qiime feature-table filter-samples \ --i-table ./dada2_table.qza \ --p-min-frequency 6000 \ --o-filtered-table ./table_6k.qza #Creating the taxonomy Barplot qiime taxa barplot \ --i-table ./table_6k.qza \ --i-taxonomy ./taxonomy.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./taxa_barplot_6000.qzv #Filtering Steps #Filtering metadata so table only shows Healthy Samples qiime feature-table filter-samples \ --i-table dada2_table.qza \ --m-metadata-file parkinsons_metadata.tsv \ --p-where "[Disease]='Control' AND NOT [Coffe_drinker]='null'" \ --o-filtered-table Healthy_Coffee_only_table.qza #Creating healthy only - feature table visualization qiime feature-table summarize \ --i-table ./Healthy_Coffee_only_table.qza \ --m-sample-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./Healthy_Coffee_only_table.qzv #Filtering metadata so table only shows PD Samples qiime feature-table filter-samples \ --i-table dada2_table.qza \ --m-metadata-file parkinsons_metadata.tsv \ --p-where "[Disease]='PD' AND NOT [Coffe_drinker]='null'" \ --o-filtered-table PD_Coffee_only_table.qza #Creating PD only - feature table visualization qiime feature-table summarize \ --i-table ./PD_Coffee_only_table.qza \ --m-sample-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./PD_Coffee_only_table.qzv #Filtering metadata so table only shows Healthy Samples qiime feature-table filter-samples \ --i-table dada2_table.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --p-where "[Disease]='Control' AND NOT [Abx_doses_last_five_years]='null'" \ --o-filtered-table Healthy_Abx_only_table.qza #Creating healthy only - feature table visualization qiime feature-table summarize \ --i-table ./Healthy_Abx_only_table.qza \ --m-sample-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --o-visualization ./Healthy_Abx_only_table.qzv #Filtering metadata so table only shows PD Samples qiime feature-table filter-samples \ --i-table dada2_table.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --p-where "[Disease]='PD' AND NOT [Abx_doses_last_five_years]='null'" \ --o-filtered-table PD_Abx_only_table.qza #Creating PD only - feature table visualization qiime feature-table summarize \ --i-table ./PD_Abx_only_table.qza \ --m-sample-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --o-visualization ./PD_Abx_only_table.qzv #Analyzing Coffee-Drinking in Healthy controls qiime diversity core-metrics-phylogenetic \ --i-table ./Healthy_Coffee_only_table.qza \ --i-phylogeny ./tree.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --p-sampling-depth 6000 \ --output-dir ./core-metrics-results_6000_HealthyCoffee #Creating Faith Phylogenetic diversity boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000_HealthyCoffee/faith_pd_vector.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./core-metrics-results_6000_HealthyCoffee/faiths-diversity.qzv #Creating Evenness Boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000_HealthyCoffee/evenness_vector.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./core-metrics-results_6000_HealthyCoffee/evenness_statistics.qzv #Creating unweighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_HealthyCoffee/unweighted_unifrac_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Coffe_drinker \ --o-visualization core-metrics-results_6000_HealthyCoffee/unweighted-unifrac-Coffee-significance.qzv #Creating weighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_HealthyCoffee/weighted_unifrac_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Coffe_drinker \ --o-visualization core-metrics-results_6000_HealthyCoffee/weighted-unifrac-Coffee-significance.qzv #Bray Curtis Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_HealthyCoffee/bray_curtis_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Coffe_drinker \ --o-visualization core-metrics-results_6000_HealthyCoffee/bray-curtis-Coffee-significance.qzv #Jaccard Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_HealthyCoffee/jaccard_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Coffe_drinker \ --o-visualization core-metrics-results_6000_HealthyCoffee/jaccard-Coffee-significance.qzv #Creating feature table of only 6000 sampling depth qiime feature-table filter-samples \ --i-table ./Healthy_Coffee_only_table.qza \ --p-min-frequency 6000 \ --o-filtered-table ./Healthy_Coffee_only_table_6k.qza #Creating the taxonomy Barplot qiime taxa barplot \ --i-table ./Healthy_Coffee_only_table_6k.qza \ --i-taxonomy ./taxonomy.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./taxa_barplot_Healthy_Coffee_only_6000.qzv #Analyzing Coffee-Drinking in PD Patients qiime diversity core-metrics-phylogenetic \ --i-table ./PD_Coffee_only_table.qza \ --i-phylogeny ./tree.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --p-sampling-depth 6000 \ --output-dir ./core-metrics-results_6000_PD_Coffee #Creating Faith Phylogenetic diversity boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000_PD_Coffee/faith_pd_vector.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./core-metrics-results_6000_PD_Coffee/faiths-diversity.qzv #Creating Evenness Boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000_PD_Coffee/evenness_vector.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./core-metrics-results_6000_PD_Coffee/evenness_statistics.qzv #Creating unweighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_PD_Coffee/unweighted_unifrac_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Coffe_drinker \ --o-visualization core-metrics-results_6000_PD_Coffee/unweighted-unifrac-Coffee-significance.qzv #Creating weighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_PD_Coffee/weighted_unifrac_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Coffe_drinker \ --o-visualization core-metrics-results_6000_PD_Coffee/weighted-unifrac-Coffee-significance.qzv #Bray Curtis Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_PD_Coffee/bray_curtis_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Coffe_drinker \ --o-visualization core-metrics-results_6000_PD_Coffee/bray-curtis-Coffee-significance.qzv #Jaccard Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_PD_Coffee/jaccard_distance_matrix.qza \ --m-metadata-file parkinsons_metadata.tsv \ --m-metadata-column Coffe_drinker \ --o-visualization core-metrics-results_6000_PD_Coffee/jaccard-Coffee-significance.qzv #Creating feature table of only 6000 sampling depth qiime feature-table filter-samples \ --i-table ./PD_Coffee_only_table.qza \ --p-min-frequency 6000 \ --o-filtered-table ./PD_Coffee_only_table_6k.qza #Creating the taxonomy Barplot qiime taxa barplot \ --i-table ./PD_Coffee_only_table_6k.qza \ --i-taxonomy ./taxonomy.qza \ --m-metadata-file ./parkinsons_metadata.tsv \ --o-visualization ./taxa_barplot_PD_Coffee_only_6000.qzv #Analyzing Abx in Healthy controls qiime diversity core-metrics-phylogenetic \ --i-table ./Healthy_Abx_only_table.qza \ --i-phylogeny ./tree.qza \ --m-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --p-sampling-depth 6000 \ --output-dir ./core-metrics-results_6000_HealthyAbx #Creating Faith Phylogenetic diversity boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000_HealthyAbx/faith_pd_vector.qza \ --m-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --o-visualization ./core-metrics-results_6000_HealthyAbx/faiths-diversity.qzv #Creating Evenness Boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000_HealthyAbx/evenness_vector.qza \ --m-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --o-visualization ./core-metrics-results_6000_HealthyAbx/evenness_statistics.qzv #Creating unweighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_HealthyAbx/unweighted_unifrac_distance_matrix.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --m-metadata-column Abx_doses_last_five_years \ --o-visualization core-metrics-results_6000_HealthyAbx/unweighted-unifrac-Abx-significance.qzv #Creating weighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_HealthyAbx/weighted_unifrac_distance_matrix.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --m-metadata-column Abx_doses_last_five_years \ --o-visualization core-metrics-results_6000_HealthyAbx/weighted-unifrac-Abx-significance.qzv #Bray Curtis Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_HealthyAbx/bray_curtis_distance_matrix.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --m-metadata-column Abx_doses_last_five_years \ --o-visualization core-metrics-results_6000_HealthyAbx/bray-curtis-Abx-significance.qzv #Jaccard Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_HealthyAbx/jaccard_distance_matrix.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --m-metadata-column Abx_doses_last_five_years \ --o-visualization core-metrics-results_6000_HealthyAbx/jaccard-Abx-significance.qzv #Creating feature table of only 6000 sampling depth qiime feature-table filter-samples \ --i-table ./Healthy_Abx_only_table.qza \ --p-min-frequency 6000 \ --o-filtered-table ./Healthy_Abx_only_table_6k.qza #Creating the taxonomy Barplot qiime taxa barplot \ --i-table ./Healthy_Abx_only_table_6k.qza \ --i-taxonomy ./taxonomy.qza \ --m-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --o-visualization ./taxa_barplot_Healthy_Abx_only_6000.qzv #Analyzing Abx in PD Patients qiime diversity core-metrics-phylogenetic \ --i-table ./PD_Abx_only_table.qza \ --i-phylogeny ./tree.qza \ --m-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --p-sampling-depth 6000 \ --output-dir ./core-metrics-results_6000_PD_Abx #Creating Faith Phylogenetic diversity boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000_PD_Abx/faith_pd_vector.qza \ --m-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --o-visualization ./core-metrics-results_6000_PD_Abx/faiths-diversity.qzv #Creating Evenness Boxplots qiime diversity alpha-group-significance \ --i-alpha-diversity ./core-metrics-results_6000_PD_Abx/evenness_vector.qza \ --m-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --o-visualization ./core-metrics-results_6000_PD_Abx/evenness_statistics.qzv #Creating unweighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_PD_Abx/unweighted_unifrac_distance_matrix.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --m-metadata-column Abx_doses_last_five_years \ --o-visualization core-metrics-results_6000_PD_Abx/unweighted-unifrac-Abx-significance.qzv #Creating weighted unifrac boxplot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_PD_Abx/weighted_unifrac_distance_matrix.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --m-metadata-column Abx_doses_last_five_years \ --o-visualization core-metrics-results_6000_PD_Abx/weighted-unifrac-Abx-significance.qzv #Bray Curtis Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_PD_Abx/bray_curtis_distance_matrix.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --m-metadata-column Abx_doses_last_five_years \ --o-visualization core-metrics-results_6000_PD_Abx/bray-curtis-Abx-significance.qzv #Jaccard Box Plot qiime diversity beta-group-significance \ --i-distance-matrix core-metrics-results_6000_PD_Abx/jaccard_distance_matrix.qza \ --m-metadata-file Parkinsons_Metadata_Abx.tsv \ --m-metadata-column Abx_doses_last_five_years \ --o-visualization core-metrics-results_6000_PD_Abx/jaccard-Abx-significance.qzv #Creating feature table of only 6000 sampling depth qiime feature-table filter-samples \ --i-table ./PD_Abx_only_table.qza \ --p-min-frequency 6000 \ --o-filtered-table ./PD_Abx_only_table_6k.qza #Creating the taxonomy Barplot qiime taxa barplot \ --i-table ./PD_Abx_only_table_6k.qza \ --i-taxonomy ./taxonomy.qza \ --m-metadata-file ./Parkinsons_Metadata_Abx.tsv \ --o-visualization ./taxa_barplot_PD_Abx_only_6000.qzv #Exporting files to compare PD vs Healthy Question while also looking at coffee consumption in R qiime tools export \ --input-path dada2_table.qza --output-path exported qiime tools export \ --input-path taxonomy.qza --output-path exported qiime tools export \ --input-path tree.qza --output-path exported cd exported #Editing column names in newly made taxonomy TSV file nano taxonomy.tsv cd .. #Creating biom file with taxonomy information to be used in R biom add-metadata \ --i exported/feature-table.biom \ --o exported/table-with-taxonomy.biom \ --observation-metadata-fp exported/taxonomy.tsv \ --sc-separated taxonomy #Exporting files to specifically do analysis on antibiotic consumption of PD patients qiime tools export \ --input-path PD_Abx_only_table.qza --output-path exported_Abx qiime tools export \ --input-path taxonomy.qza --output-path exported_Abx qiime tools export \ --input-path tree.qza --output-path exported_Abx cd exported_Abx #Change file name to not get it confused with BIOM file above mv feature-table.biom PD_Abx.biom #Editing column names in newly made taxonomy TSV file nano taxonomy.tsv cd .. #Creating biom file with taxonomy information to be used in R biom add-metadata \ --i exported/PD_Abx.biom \ --o exported/PD_Abx_only.biom \ --observation-metadata-fp exported/taxonomy.tsv \ --sc-separated taxonomy #Exporting files to specifically do analysis on antibiotic consumption of Healthy participants qiime tools export \ --input-path Healthy_Abx_only_table.qza --output-path exported_Healthy_Abx qiime tools export \ --input-path taxonomy.qza --output-path exported_Healthy_Abx qiime tools export \ --input-path tree.qza --output-path exported_Healthy_Abx cd exported_Healthy_Abx #Change file name to not get it confused with BIOM file above mv feature-table.biom Healthy_Abx.biom #Editing column names in newly made taxonomy TSV file nano taxonomy.tsv cd .. #Creating biom file with taxonomy information to be used in R biom add-metadata \ --i exported/Healthy_Abx.biom \ --o exported/Healthy_Abx_only.biom \ --observation-metadata-fp exported/taxonomy.tsv \ --sc-separated taxonomy #Then copied the BIOM, NWK, and corresponding metadata file onto local computer to do the R analysis # Load CRAN packages library(tidyverse) library(vegan) library(ape) # Load Bioconductor packages library(phyloseq) library(DESeq2) # Calculate relative abundance calculate_relative_abundance <- function(x) x / sum(x) # Calculate geometric mean calculate_gm_mean <- function(x, na.rm = TRUE) { exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x)) } # Define the set of random numbers set.seed(711) # Import qiime files biom_file <- import_biom("table-with-taxonomy.biom") metadata <- import_qiime_sample_data("Parkinsons_Metadata.tsv") tree <- read_tree_greengenes("tree.nwk") # IMPORTANT!!! Convert tree from multichotomous to dichotomous tree tree <- multi2di(tree) # Combine information into single phyloseq object physeq <- merge_phyloseq(biom_file, metadata, tree) # Change taxonomic levels from numbers to actual names colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class","Order", "Family", "Genus", "Species") head(tax_table(physeq)) #Setting sampling depth sample_sums(physeq) >= 6000 #Filter samples based on sampling depth at_least_6000 <- prune_samples(sample_sums(physeq) >= 6000, physeq) sample_sums(at_least_6000) #Getting most abundant taxa at the genus level counts <- taxa_sums(at_least_6000) relative_abundance <- calculate_relative_abundance(counts) abundant <- relative_abundance > 0.001 abundant_taxa <- prune_taxa(abundant, at_least_6000) abundant_genera <- tax_glom(abundant_taxa, taxrank = "Genus") abundant_genera abundant_genera_family <- tax_glom(abundant_taxa, taxrank = "Family") abundant_genera_family abundant_genera_species <- tax_glom(abundant_taxa, taxrank = "Species") abundant_genera_species #DESeq2 Analysis Genus deseq <- phyloseq_to_deseq2(abundant_genera, ~ Disease) geo_means <- apply(counts(deseq), 1, calculate_gm_mean) deseq <- estimateSizeFactors(deseq, geoMeans = geo_means) deseq <- DESeq(deseq, fitType = "local") diff_abund <- results(deseq) #DESeq2 Analysis Family deseq_family <- phyloseq_to_deseq2(abundant_genera_family, ~ Disease) geo_means_family <- apply(counts(deseq_family), 1, calculate_gm_mean) deseq_family <- estimateSizeFactors(deseq_family, geoMeans = geo_means_family) deseq_family <- DESeq(deseq_family, fitType = "local") diff_abund_family <- results(deseq_family) #DESeq2 Analysis species deseq_species <- phyloseq_to_deseq2(abundant_genera_species, ~ Disease) geo_means_species <- apply(counts(deseq_species), 1, calculate_gm_mean) deseq_species <- estimateSizeFactors(deseq_species, geoMeans = geo_means_species) deseq_species <- DESeq(deseq_species, fitType = "local") diff_abund_species <- results(deseq_species) #Filter data based on alpha < 0.05 for genus alpha <- 0.05 significant <- as.data.frame(diff_abund) significant <- filter(significant, padj < alpha) #Filter data based on alpha < 0.05 for family alpha <- 0.05 significant_family <- as.data.frame(diff_abund_family) significant_family <- filter(significant_family, padj < alpha) #Filter data based on alpha < 0.05 for species alpha <- 0.05 significant_species <- as.data.frame(diff_abund_species) significant_species <- filter(significant_species, padj < alpha) #Merging Data for genus genera_df <- as.data.frame(tax_table(abundant_genera)) significant <- merge(significant, genera_df, by = "row.names") significant <- arrange(significant, log2FoldChange) dim(significant) #Merging Data for family family_df <- as.data.frame(tax_table(abundant_genera_family)) significant_family <- merge(significant_family, family_df, by = "row.names") significant_family <- arrange(significant_family, log2FoldChange) dim(significant_family) #Merging Data for species species_df <- as.data.frame(tax_table(abundant_genera_species)) significant_species <- merge(significant_species, species_df, by = "row.names") significant_species <- arrange(significant_species, log2FoldChange) dim(significant_species) #Creating differential abundance genera plot significant <- filter(significant, Genus != "g__") significant <- mutate(significant, Genus = factor(Genus, levels = Genus)) ggplot(significant, aes(x = log2FoldChange, y = Genus)) + geom_bar(stat = "identity") + labs(title = "Differential abundant genera - PD vs Healthy", x = "log2 Fold-Change", y = "Genus") + theme_bw() #Creating differential abundance family plot significant_family <- mutate(significant_family, Family = factor(Family, levels = unique(Family))) ggplot(significant_family, aes(x = log2FoldChange, y = Family)) + geom_bar(stat = "identity") + labs(title = "Differential abundant Family", x = "log2 Fold-Change", y = "Family") + theme_bw() #Relative abundance at_least_6000_RA <- transform_sample_counts(at_least_6000, calculate_relative_abundance) #Getting most abundant taxa and getting relative abundance graphs counts <- taxa_sums(at_least_6000) relative_abundance <- calculate_relative_abundance(counts) abundant <- relative_abundance > 0.001 abundant_RA_taxa <- prune_taxa(abundant, at_least_6000_RA) abundant_RA_taxa <- tax_glom(abundant_RA_taxa, taxrank = "Genus") bifidobacterium <- subset_taxa(abundant_RA_taxa, Genus == "g__Bifidobacterium") otu_table(bifidobacterium) bifidobacterium_long <- psmelt(bifidobacterium) bifidobacterium_long roseburia <- subset_taxa(abundant_RA_taxa, Genus == "g__Roseburia") otu_table(roseburia) roseburia_long <- psmelt(roseburia) roseburia_long faecalibacterium <- subset_taxa(abundant_RA_taxa, Genus == "g__Faecalibacterium") otu_table(faecalibacterium) faecalibacterium_long <- psmelt(faecalibacterium) faecalibacterium_long collinsella <- subset_taxa(abundant_RA_taxa, Genus == "g__Collinsella") otu_table(collinsella) collinsella_long <- psmelt(collinsella) collinsella_long bilophila <- subset_taxa(abundant_RA_taxa, Genus == "g__Bilophila") otu_table(bilophila) bilophila_long <- psmelt(bilophila) bilophila_long akkermansia <- subset_taxa(abundant_RA_taxa, Genus == "g__Akkermansia") otu_table(akkermansia) akkermansia_long <- psmelt(akkermansia) akkermansia_long #Relative abundance plot ggplot(bifidobacterium_long, aes (x = Disease, y = Abundance)) + geom_boxplot() + labs(title = "Relative abundance Bifidobacterium", x = "Disease", y = "Relative Abundance") ggplot(roseburia_long, aes (x = Disease, y = Abundance)) + geom_boxplot() + labs(title = "Relative abundance Roseburia", x = "Disease", y = "Relative Abundance") ggplot(faecalibacterium_long, aes (x = Disease, y = Abundance)) + geom_boxplot() + labs(title = "Relative abundance Faecalibacterium", x = "Disease", y = "Relative Abundance") ggplot(collinsella_long, aes (x = Disease, y = Abundance)) + geom_boxplot() + labs(title = "Relative abundance Collinsella", x = "Disease", y = "Relative Abundance") ggplot(bilophila_long, aes (x = Disease, y = Abundance)) + geom_boxplot() + labs(title = "Relative abundance Bilophila", x = "Disease", y = "Relative Abundance") ggplot(akkermansia_long, aes (x = Disease, y = Abundance)) + geom_boxplot() + labs(title = "Relative abundance Akkermansia", x = "Disease", y = "Relative Abundance") #Creating differential abundance plot significant <- mutate(significant, Genus = factor(Genus, levels = unique(Genus))) ggplot(significant, aes(x = log2FoldChange, y = Genus)) + geom_bar(stat = "identity") + labs(title = "Differential abundant genera", x = "log2 Fold-Change", y = "Genus") + theme_bw() # Load CRAN packages library(tidyverse) library(vegan) library(ape) # Load Bioconductor packages library(phyloseq) library(DESeq2) # Calculate relative abundance calculate_relative_abundance <- function(x) x / sum(x) # Calculate geometric mean calculate_gm_mean <- function(x, na.rm = TRUE) { exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x)) } # Define the set of random numbers set.seed(711) # Import qiime files biom_file_PD <- import_biom("PD_Abx_only.biom") biom_file_Healthy <- import_biom("Healthy_Abx_only.biom") metadata <- import_qiime_sample_data("Parkinsons_Metadata_Abx.tsv") tree <- read_tree_greengenes("tree.nwk") # IMPORTANT!!! Convert tree from multichotomous to dichotomous tree tree <- multi2di(tree) # Combine information into single phyloseq object healthy_physeq <- merge_phyloseq(biom_file_Healthy, metadata, tree) PD_physeq <- merge_phyloseq(biom_file_PD, metadata, tree) # Change taxonomic levels from numbers to actual names colnames(tax_table(healthy_physeq)) <- c("Kingdom", "Phylum", "Class","Order", "Family", "Genus", "Species") colnames(tax_table(PD_physeq)) <- c("Kingdom", "Phylum", "Class","Order", "Family", "Genus", "Species") #Setting sampling depth sample_sums(physeq) >= 6000 #Filtering Metadata so all samples involved have at least 6000 features healthy_6000 <- prune_samples(sample_sums(healthy_physeq) >= 6000, healthy_physeq) sample_sums(healthy_6000) PD_6000 <- prune_samples(sample_sums(PD_physeq) >= 6000, PD_physeq) sample_sums(PD_6000) #Getting most abundant taxa for Healthy patients at the genus level healthy_counts <- taxa_sums(healthy_6000) healthy_relative_abundance <- calculate_relative_abundance(healthy_counts) healthy_abundant <- healthy_relative_abundance > 0.001 healthy_abundant_taxa <- prune_taxa(healthy_abundant, healthy_6000) healthy_abundant_genera <- tax_glom(healthy_abundant_taxa, taxrank = "Genus") healthy_abundant_genera #Getting most abundant taxa for PD patients at the genus level PD_counts <- taxa_sums(PD_6000) PD_relative_abundance <- calculate_relative_abundance(PD_counts) PD_abundant <- PD_relative_abundance > 0.001 PD_abundant_taxa <- prune_taxa(PD_abundant, PD_6000) PD_abundant_genera <- tax_glom(PD_abundant_taxa, taxrank = "Genus") PD_abundant_genera #DESeq2 Analysis Healthy Patients healthy_deseq <- phyloseq_to_deseq2(healthy_abundant_genera, ~ Abx_doses_last_five_years) geo_means <- apply(counts(healthy_deseq), 1, calculate_gm_mean) healthy_deseq <- estimateSizeFactors(healthy_deseq, geoMeans = geo_means) healthy_deseq <- DESeq(healthy_deseq, fitType = "local") healthy_diff_abund <- results(healthy_deseq) #DESeq2 Analysis PD Patients PD_deseq <- phyloseq_to_deseq2(PD_abundant_genera, ~ Abx_doses_last_five_years) geo_means <- apply(counts(PD_deseq), 1, calculate_gm_mean) PD_deseq <- estimateSizeFactors(PD_deseq, geoMeans = geo_means) PD_deseq <- DESeq(PD_deseq, fitType = "local") PD_diff_abund <- results(PD_deseq) #Filter data based on alpha < 0.05 for healthy patients alpha <- 0.05 healthy_significant <- as.data.frame(healthy_diff_abund) healthy_significant <- filter(healthy_significant, padj < alpha) #Filter data based on alpha < 0.05 for PD patients alpha <- 0.05 PD_significant <- as.data.frame(PD_diff_abund) PD_significant <- filter(PD_significant, padj < alpha) #Merging Data PD Patients PD_genera_df <- as.data.frame(tax_table(PD_abundant_genera)) PD_significant <- merge(PD_significant, PD_genera_df, by = "row.names") PD_significant <- arrange(PD_significant, log2FoldChange) dim(PD_significant) #Creating differential abundance plot PD_significant <- mutate(PD_significant, Row.names = factor(Row.names, levels = Row.names)) ggplot(PD_significant, aes(x = log2FoldChange, y = Genus)) + geom_bar(stat = "identity") + labs(title = "Differential abundant genera", x = "log2 Fold-Change", y = "Genus") + theme_bw() #Relative abundance PD_6000_RA <- transform_sample_counts(PD_6000, calculate_relative_abundance) #Getting most abundant taxa for PD patients at the genus level PD_counts <- taxa_sums(PD_6000) PD_relative_abundance <- calculate_relative_abundance(PD_counts) PD_abundant <- PD_relative_abundance > 0.001 PD_abundant_RA_taxa <- prune_taxa(PD_abundant, PD_6000_RA) PD_abundant_RA_taxa <- tax_glom(PD_abundant_RA_taxa, taxrank = "Genus") bifidobacterium <- subset_taxa(PD_abundant_RA_taxa, Genus == "g__Bifidobacterium") otu_table(bifidobacterium) bifidobacterium_long <- psmelt(bifidobacterium) bifidobacterium_long #Relative abundance plot ggplot(bifidobacterium_long, aes (x = Abx_doses_last_five_years, y = Abundance)) + geom_boxplot() + labs(title = "Relative abundance Bifidobacterium", x = "Abx", y = "Abundance") # Load CRAN packages library(tidyverse) library(vegan) library(ape) # Load Bioconductor packages library(phyloseq) library(DESeq2) # Calculate relative abundance calculate_relative_abundance <- function(x) x / sum(x) # Calculate geometric mean calculate_gm_mean <- function(x, na.rm = TRUE) { exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x)) } # Define the set of random numbers set.seed(711) # Import qiime files biom_file <- import_biom("table-with-taxonomy.biom") metadata <- import_qiime_sample_data("Parkinsons_Metadata.tsv") tree <- read_tree_greengenes("tree.nwk") # IMPORTANT!!! Convert tree from multichotomous to dichotomous tree tree <- multi2di(tree) # Combine information into single phyloseq object physeq <- merge_phyloseq(biom_file, metadata, tree) # Change taxonomic levels from numbers to actual names colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class","Order", "Family", "Genus", "Species") head(tax_table(physeq)) #Setting sampling depth sample_sums(physeq) >= 6000 #Filter samples based on sampling depth at_least_6000 <- prune_samples(sample_sums(physeq) >= 6000, physeq) sample_sums(at_least_6000) #Filter for Healthy Patients healthy <- subset_samples(at_least_6000, Disease == "Control") healthy <- subset_samples(healthy, Coffe_drinker != "") #Filter for PD Patients PD <- subset_samples(at_least_6000, Disease == "PD") PD <- subset_samples(PD, Coffe_drinker != "") #Getting most abundant taxa for Healthy patients at the genus level healthy_counts <- taxa_sums(healthy) healthy_relative_abundance <- calculate_relative_abundance(healthy_counts) healthy_abundant <- healthy_relative_abundance > 0.001 healthy_abundant_taxa <- prune_taxa(healthy_abundant, healthy) healthy_abundant_genera <- tax_glom(healthy_abundant_taxa, taxrank = "Genus") healthy_abundant_genera #Getting most abundant taxa for Healthy patients at the genus level PD_counts <- taxa_sums(PD) PD_relative_abundance <- calculate_relative_abundance(PD_counts) PD_abundant <- PD_relative_abundance > 0.001 PD_abundant_taxa <- prune_taxa(PD_abundant, PD) PD_abundant_genera <- tax_glom(PD_abundant_taxa, taxrank = "Genus") PD_abundant_genera #DESeq2 Analysis Healthy Patients deseq_healthy <- phyloseq_to_deseq2(healthy_abundant_genera, ~ Coffe_drinker) geo_means <- apply(counts(deseq_healthy), 1, calculate_gm_mean) deseq_healthy <- estimateSizeFactors(deseq_healthy, geoMeans = geo_means) deseq_healthy <- DESeq(deseq_healthy, fitType = "local") healthy_diff_abund <- results(deseq_healthy) #DESeq2 Analysis PD Patients deseq_PD <- phyloseq_to_deseq2(PD_abundant_genera, ~ Coffe_drinker) geo_means <- apply(counts(deseq_PD), 1, calculate_gm_mean) deseq_PD <- estimateSizeFactors(deseq_PD, geoMeans = geo_means) deseq_PD <- DESeq(deseq_PD, fitType = "local") PD_diff_abund <- results(deseq_PD) #Filter data based on alpha < 0.05 for Healthy alpha <- 0.05 healthy_significant <- as.data.frame(healthy_diff_abund) healthy_significant <- filter(healthy_significant, padj < alpha) #Filter data based on alpha < 0.05 for PD alpha <- 0.05 PD_significant <- as.data.frame(PD_diff_abund) PD_significant <- filter(PD_significant, padj < alpha)