#load in packages# library(tidyverse) library(phyloseq) library(ggVennDiagram) library(ape) library(dplyr) library(vegan) #make phyloseq object #load in data metafp3 <- "metadata3.txt" meta3 <- read_delim(metafp3, delim="\t") otufp3 <- "table.txt" otu3 <- read_delim(file = otufp3, delim="\t", skip=1) taxfp3 <- "taxonomy.tsv" tax3 <- read_delim(taxfp3, delim="\t") phylotreefp3 <- "tree.nwk" phylotree3 <- read_tree(phylotreefp3) class(phylotree3) #convert files to be phyloseq-compatible #### Format OTU table #### # save everything except first column (OTU ID) into a matrix otu_mat3 <- as.matrix(otu[,-1]) # Make first column (#OTU ID) the rownames of the new matrix rownames(otu_mat3) <- otu$`#OTU ID` # Use the "otu_table" function to make an OTU table OTU3 <- otu_table(otu_mat3, taxa_are_rows = TRUE) class(OTU) #### Format sample metadata #### # Save everything except sampleid as new data frame samp_df3 <- as.data.frame(meta3[,-1]) # Make sampleids the rownames rownames(samp_df3)<- meta3$"#SampleID" # Make phyloseq sample data with sample_data() function SAMP3 <- sample_data(samp_df3) class(SAMP) #### Formatting taxonomy #### # Convert taxon strings to a table with separate taxa rank columns tax_mat3 <- tax %>% select(-Confidence)%>% separate(col=Taxon, sep="; " , into = c("Domain","Phylum","Class","Order","Family","Genus","Species")) %>% as.matrix() # Saving as a matrix # Save everything except feature IDs tax_mat3 <- tax_mat[,-1] # Make sampleids the rownames rownames(tax_mat3) <- tax$`Feature ID` # Make taxa table TAX3 <- tax_table(tax_mat3) class(TAX) #### Create phyloseq object #### # Merge all into a phyloseq object parkinson_phyloseqobject3 <- phyloseq(OTU3, SAMP3, TAX3, phylotree3) # View components of phyloseq object with the following commands otu_table(parkinson_phyloseqobject3) sample_data(parkinson_phyloseqobject3) tax_table(parkinson_phyloseqobject3) phy_tree(parkinson_phyloseqobject3) #filter for only PD patients parkinson_filt3 <- subset_samples(parkinson_phyloseqobject3, Disease=="PD" ) parkinson_final3 <- subset_samples(parkinson_filt3, !Combined=="NA") # Load metadata sampdatfp <- "metadata3.txt" sampdat <- read.delim(sampdatfp) str(sampdat) #Linear Correlation plot <- ggplot(sampdat, aes(x=BDI_depression_score, y=FSS_fatigue_score)) + geom_point() + geom_smooth(method="lm", col="blue") + xlab("BDI Depression Score") + ylab("FSS Fatigue Score") ggsave(filename = "correlation.png" , plot , height=4, width=7) #plot alpha diversity richness distressed vs non-distressed plot_richness(parkinson_final3, x="Combined1", measures="Shannon")+ geom_boxplot() + xlab("Non-distressed vs Distressed") ######### exact numbers for alpha shannon diversity analysis ############### # estimate alpha-diversity: table/data.frame alpha_div <- estimate_richness(physeq = parkinson_final3, measures = c("Shannon")) ## add metadata columns to alpha-diversity data frame metadataee <- sample_data(object = parkinson_final3) %>% data.frame(.) # convert to a data.frame # check if all the rownames between metadata and alpha-diversity match to join them blindly #prints nothing if they match; otherwise prints error stopifnot( all( rownames(parkinson_final3) == rownames(alpha_div))) # add metadata to alpha-div alpha_div_metadata <- cbind(alpha_div, metadataee) # determine the median by groups: change the group 'SampleType' to the sample_data column #name group that you want to use alpha_div_metadata_median <- alpha_div_metadata %>% group_by(combineda) %>% summarise_at(vars(Shannon), median) %>% as.data.frame(.) # print results print(alpha_div_metadata_median) ############## #plotting beta diversity (bray) bc_dm <- distance(parkinson_final3, method="bray") pcoa_bc <- ordinate(parkinson_final3, method="PCoA", distance=bc_dm) plot_ordination(parkinson_final3, pcoa_bc, color = "Combined1", shape = "Combined1") + labs(col = "Distressed") #taxa summaries plot # first convert the ASV table to relative abundance: parkinson_ra3 <- transform_sample_counts(parkinson_final3, function(x) x/sum(x)) parkinson_phylum3 <- tax_glom(parkinson_ra3, taxrank = "Phylum", NArm=FALSE) plot_bar(parkinson_phylum3, fill="Phylum") + facet_wrap(.~Combined1, scales = "free_x") #taxa summaries plot with x axis label # first convert the ASV table to relative abundance: parkinson_ra3 <- transform_sample_counts(parkinson_final3, function(x) x/sum(x)) parkinson_phylum3 <- tax_glom(parkinson_ra3, taxrank = "Phylum", NArm=FALSE) plot_bar(parkinson_phylum3, fill="Phylum") + facet_wrap(.~Combined1, scales = "free_x") + xlab("Non-distressed vs Distressed") ###april 3: producing PCA for all beta diversity metrics (jaccard, unweighted unifrac, weighted unifrac) ?distance #plotting beta using jaccard bc_dmj <- distance(parkinson_final3, method="jaccard", binary = TRUE) pcoa_bcj <- ordinate(parkinson_final3, method="PCoA", distance=bc_dmj) plot_ordination(parkinson_final3, pcoa_bcj, color = "combineda", shape = "combineda") + labs(col = " ", shape = " ") #plotting beta using unweighted unifrac bc_dmu <- distance(parkinson_final3, method="unifrac") pcoa_bcu <- ordinate(parkinson_final3, method="PCoA", distance=bc_dmu) plot_ordination(parkinson_final3, pcoa_bcu, color = "combineda", shape = "combineda") + labs(col = " ", shape = " ") #plotting beta using weighted unifrac bc_dmw <- distance(parkinson_final3, method="wunifrac") pcoa_bcw <- ordinate(parkinson_final3, method="PCoA", distance=bc_dmw) plot_ordination(parkinson_final3, pcoa_bcw, color = "combineda", shape = "combineda") + labs(col = " ", shape = " ") #plotting beta using weighted unifrac grayscale bc_dmw <- distance(parkinson_final3, method="wunifrac") pcoa_bcw <- ordinate(parkinson_final3, method="PCoA", distance=bc_dmw) plot_ordination(parkinson_final3, pcoa_bcw, shape = "combineda") + labs(col = " ", shape = " ") ####april 5 (making previous graphs suitable for publishing) # alpha diversity richness distressed vs non-distressed Shannon plot_richness(parkinson_final3) plot_richness(parkinson_final3, x="combineda", measures="Shannon") + stat_boxplot(geom='errorbar', width = 0.3) + geom_boxplot() + theme(axis.text.x = element_text(angle = 360, vjust = 0.5, hjust=0.5)) + xlab("PD Patient Status") + ylab("Shannon Diversity") # alpha diversity richness distressed vs non-distressed Chao1 plot_richness(parkinson_final3, x="combineda", measures="Chao1") + stat_boxplot(geom='errorbar', width = 0.3) + geom_boxplot() + theme(axis.text.x = element_text(angle = 360, vjust = 0.5, hjust=0.5)) + xlab(" ") # alpha diversity richness distressed vs non-distressed observed features plot_richness(parkinson_final3, x="combineda", measures="Observed") + stat_boxplot(geom='errorbar', width = 0.3) + geom_boxplot() + theme(axis.text.x = element_text(angle = 360, vjust = 0.5, hjust=0.5)) + xlab(" ") # beta diversity bray bc_dma <- distance(parkinson_final3, method="bray") pcoa_bca <- ordinate(parkinson_final3, method="PCoA", distance=bc_dma) plot_ordination(parkinson_final3, pcoa_bca, color = "combineda", shape = "combineda") + labs(col = " ", shape = " ") #taxa summaries plot with distressed/non-distressed label instead of yes/no parkinson_ra3 <- transform_sample_counts(parkinson_final3, function(x) x/sum(x)) parkinson_phylum3 <- tax_glom(parkinson_ra3, taxrank = "Phylum", NArm=FALSE) plot_bar(parkinson_phylum3, fill="Phylum") + facet_wrap(.~combineda, scales = "free_x") #taxa summaries plot with distressed/non-distressed label instead of yes/no parkinson_ra3 <- transform_sample_counts(parkinson_final3, function(x) x/sum(x)) parkinson_phylum3 <- tax_glom(parkinson_ra3, taxrank = "Phylum", NArm=FALSE) plot_bar(parkinson_phylum3, fill="Phylum") + facet_wrap(.~combineda, scales = "free_x") + xlab("Distressed Status") ###april 7: permanova on weighted unifrac dm_unifrac <- UniFrac(parkinson_final3, weighted=TRUE) # Weighted UniFrac metafp3p <- data.frame(sample_data(parkinson_final3)) adonis2(dm_unifrac ~ distressed_only*nondistressed_only, data=metafp3p), na.action = na.omit) ###april 10: permanova on weighted unifrac dm_unifrac <- UniFrac(parkinson_final3, weighted=TRUE) # Weighted UniFrac metafp3p <- data.frame(sample_data(parkinson_final3)) adonis2(dm_unifrac ~ combineda, data=metafp3p, na.action = na.omit) # PERMANOVA using other distance metrics dm_braycurtis <- vegdist(t(otu_table(parkinson_final3)), method="bray") # Bray-curtis adonis2(dm_braycurtis ~ combineda, data=metafp3p) dm_jaccard <- vegdist(t(otu_table(parkinson_final3)), method="jaccard") # Jaccard adonis2(dm_jaccard ~ combineda, data=metafp3p) dm_unifracu <- UniFrac(parkinson_final3, weighted=FALSE) # Unweighted UniFrac adonis2(dm_unifracu ~ combineda, data=metafp3p) ?na.fail