##AIM1 and 2### library (phyloseq) library(ape) library(tidyverse) library(vegan) metafp <-"anemia/anemia_metadata.txt" meta <- read_delim(metafp, delim="\t") otufp <- "table1_export/feature-table.txt" otu <- read_delim(file= otufp, delim="\t", skip=1) taxfp <- "anemia-taxonomy_EXPORT/taxonomy.tsv" tax <- read_delim(taxfp, delim="\t") phylotreefp <- "anemia-rooted-tree_EXPORT/tree.nwk" phylotree <-read.tree(phylotreefp) otu_mat <- as.matrix(otu[,-1]) rownames(otu_mat) <-otu$'#OTU ID' OTU <- otu_table(otu_mat, taxa_are_rows=TRUE) samp_df <-as.data.frame(meta[,-1]) rownames(samp_df)<-meta$`#SampleID` SAMP <-sample_data(samp_df) tax_mat <- tax %>% select(-Confidence)%>% separate(col=Taxon, sep="; ", into = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")) %>% as.matrix() tax_mat <-tax_mat[,-1] rownames(tax_mat)<-tax$`Feature ID` TAX <-tax_table(tax_mat) anemia<-phyloseq(OTU, SAMP,TAX,phylotree) #Filter anemia_filt <- subset_taxa(anemia, Domain == "d__Bacteria" & Class!="c__Chloroplast" & Family !="f__Mitochondria") anemia_filt_nolow <- filter_taxa(anemia_filt, function(x) sum(x)>2, prune = TRUE) anemia_final1 <- subset_samples(anemia_filt_nolow, age_months == "6"& anemia!="Missing: Not collected") anemia_final <- subset_samples(anemia_final1, diet == "ExclusiveBM"|diet== "BM.Soup.Broth") save(anemia_final, file="anemia_final.RData") save(anemia_rare, file="anemia_rare.RData") load("anemia_final.RData") load("anemia_rare.RData") ##Group 1- Diversity metrics across anemic status# #Alpha Diversity# plot_richness(anemia_rare, measures = c("Shannon", "Chao1")) plot_richness(anemia_rare, x = "anemia", measures = c("Shannon","Chao1"))+ geom_boxplot() #Beta Diversity# # unweighted unifrac pcoA plot uf_dm <- distance(anemia_final, method="unifrac") pcoa_uf <- ordinate(anemia_final, method="PCoA", distance=uf_dm) plot_ordination(anemia_final, pcoa_uf, color = "anemia") +stat_ellipse(aes(group= anemia), linetype = 2) # bray curtis #anemia_diets_stats<-subset_samples(anemia_final, anemia=="anemic") bc_dm <- distance(anemia_final, method="bray") pcoa_bc <- ordinate(anemia_final, method="PCoA", distance=bc_dm) plot_ordination(anemia_final, pcoa_bc, color = "anemia") + stat_ellipse(aes(group= anemia), linetype = 2) # jaccard j_dm <- distance(anemia_final, method="jaccard") pcoa_j <- ordinate(anemia_final, method="PCoA", distance=j_dm) plot_ordination(anemia_final, pcoa_j, color = "anemia")+stat_ellipse(aes(group= anemia), linetype = 2) #Taxa Barplot anemia_RA <- transform_sample_counts(anemia_rare, function(x) x/sum(x)) plot_bar(anemia_RA, fill="Phylum") anemia_phylum <- tax_glom(anemia_RA, taxrank = "Phylum", NArm=FALSE) plot_bar(anemia_phylum, fill="Phylum") + facet_wrap(.~anemia, scales = "free_x") #Group 2- Diversity metrics across diets #Alpha diversity# plot_richness(anemia_final, measures = c("Shannon", "Chao1")) plot_richness(anemia_final, x = "diet")+ geom_boxplot() #Beta Diversity# # unweighted unifrac pcoA plot uf_dm <- distance(anemia_final, method="unifrac") pcoa_uf <- ordinate(anemia_final, method="PCoA", distance=uf_dm) plot_ordination(anemia_final, pcoa_uf, color = "diet") +stat_ellipse(aes(group= diet), linetype = 2) # bray curtis #anemia_diets_stats<-subset_samples(anemia_final, anemia=="anemic") bc_dm <- distance(anemia_final, method="bray") pcoa_bc <- ordinate(anemia_final, method="PCoA", distance=bc_dm) plot_ordination(anemia_final, pcoa_bc, color = "diet") + stat_ellipse(aes(group= diet), linetype = 2) # jaccard j_dm <- distance(anemia_final, method="jaccard") pcoa_j <- ordinate(anemia_final, method="PCoA", distance=j_dm) plot_ordination(anemia_final, pcoa_j, color = "diet")+stat_ellipse(aes(group= diet), linetype = 2) #Taxa Barplot anemia_RA_1 <- transform_sample_counts(anemia_final, function(x) x/sum(x)) plot_bar(anemia_RA_1, fill="Phylum") anemia_phylum_1 <- tax_glom(anemia_RA_1, taxrank = "Phylum", NArm=FALSE) plot_bar(anemia_phylum, fill="Phylum") + facet_wrap(.~diet, scales = "free_x") #Group 3- Across anemic status within BM.soup.broth anemia_BMsoupbroth <- subset_samples(anemia_final, diet=="BM.Soup.Broth") #Alpha Diversity# plot_richness(anemia_BMsoupbroth, measures = c("Shannon", "Chao1")) plot_richness(anemia_BMsoupbroth, x = "anemia")+ geom_boxplot() #Beta Diversity # # unweighted unifrac pcoA plot uf_dm <- distance(anemia_BMsoupbroth, method="unifrac") pcoa_uf <- ordinate(anemia_BMsoupbroth, method="PCoA", distance=uf_dm) plot_ordination(anemia_BMsoupbroth, pcoa_uf, color = "anemia") + labs(pch="Diet", col = "Anemia Status") + stat_ellipse(aes(group= anemia), linetype = 2) # bray curtis bc_dm <- distance(anemia_BMsoupbroth, method="bray") pcoa_bc <- ordinate(anemia_BMsoupbroth, method="PCoA", distance=bc_dm) plot_ordination(anemia_BMsoupbroth, pcoa_bc, color = "anemia") + labs(pch="Diet", col = "Anemia Status")+ stat_ellipse(aes(group= anemia), linetype = 2) # jaccard j_dm <- distance(anemia_BMsoupbroth, method="jaccard") pcoa_j <- ordinate(anemia_BMsoupbroth, method="PCoA", distance=j_dm) plot_ordination(anemia_BMsoupbroth, pcoa_j, color = "anemia") + labs(pch="Diet", col = "Anemia Status") + stat_ellipse(aes(group= anemia), linetype = 2) #Taxa Barplot anemia_RA_BM.Soup.only <- transform_sample_counts(anemia_BMsoupbroth, function(x) x/sum(x)) plot_bar(anemia_RA_BM.Soup.only, fill="Phylum") anemia_phylum_Bm.Soup.only <- tax_glom(anemia_RA_BM.Soup.only, taxrank = "Phylum", NArm=FALSE) plot_bar(anemia_phylum_normalonly, fill="Phylum") + facet_wrap(.~anemia, scales = "free_x") #Group 4- Across anemia status within BM.only anemia_BMonly <- subset_samples(anemia_final, diet=="ExclusiveBM") #Alpha Diversity plot_richness(anemia_BMonly, measures = c("Shannon", "Chao1")) plot_richness(anemia_BMonly, x = "anemia")+ geom_boxplot() #Beta Diversity# # unweighted unifrac pcoA plot uf_dm_BMonly <- distance(anemia_BMonly, method="unifrac") pcoa_uf_BMonly <- ordinate(anemia_BMonly, method="PCoA", distance=uf_dm_BMonly) plot_ordination(anemia_BMonly, pcoa_uf_BMonly, color = "anemia") + labs(pch="Diet", col = "Anemia Status")+ stat_ellipse(aes(group= anemia), linetype = 2) # bray curtis bc_dm_Bmonly <- distance(anemia_BMonly, method="bray") pcoa_bc_BMonly <- ordinate(anemia_BMonly, method="PCoA", distance=bc_dm_BMonly) plot_ordination(anemia_BMonly, pcoa_bc_BMonly, color = "anemia") + labs(pch="Diet", col = "Anemia Status")+ stat_ellipse(aes(group= anemia), linetype = 2) # jaccard j_dm_BMOnly <- distance(anemia_BMonly, method="jaccard") pcoa_j_BMonly <- ordinate(anemia_BMonly, method="PCoA", distance=j_dm_BMonly) plot_ordination(anemia_BMonly, pcoa_j_BMonly, color = "anemia") + labs(pch="Diet", col = "Anemia Status")+ stat_ellipse(aes(group= anemia), linetype = 2) #Taxa barplot anemia_RA_BMonly <- transform_sample_counts(anemia_BMonly, function(x) x/sum(x)) plot_bar(anemia_RA_BMonly, fill="Phylum") anemia_phylum_BMonly <- tax_glom(anemia_RA_BMonly, taxrank = "Phylum", NArm=FALSE) plot_bar(anemia_phylum_BMonly, fill="Phylum") + facet_wrap(.~diet, scales = "free_x") #Group 5- Across diets within normal normal_only <-subset_samples(anemia_final, anemia=="normal") #Alpha Diversity plot_richness(normal_only) plot_richness(normal_only, measures = c("Shannon", "Chao1")) plot_richness(normal_only, x = "diet")+ geom_boxplot() #Beta Diversity # unweighted unifrac pcoA plot pcoa_dm_normalonly <- distance(normal_only, method="unifrac") pcoa_bc_normalonly <- ordinate(normal_only, method="PCoA", distance=bc_dm_normalonly) plot_ordination(normal_only, pcoa_bc_normalonly, color= 'diet')+ stat_ellipse(aes(group= diet), linetype = 2) # bray curtis bc_dm <- distance(normal_only, method="bray") pcoa_bc <- ordinate(normal_only, method="PCoA", distance=bc_dm) plot_ordination(normal_only, pcoa_bc, color = "diet") + stat_ellipse(aes(group= diet), linetype = 2) # jaccard j_dm_normal <- distance(normal_only, method="jaccard") pcoa_j_normal <- ordinate(normal_only, method="PCoA", distance=j_dm_normal) plot_ordination(normal_only, pcoa_j_normal, color = "diet")+stat_ellipse(aes(group= diet), linetype = 2) #Taxa barplot anemia_RA_normalonly <- transform_sample_counts(normal_only, function(x) x/sum(x)) plot_bar(anemia_RA_normalonly, fill="Phylum") anemia_phylum_normalonly <- tax_glom(anemia_RA_normalonly, taxrank = "Phylum", NArm=FALSE) plot_bar(anemia_phylum_normalonly, fill="Phylum") + facet_wrap(.~diet, scales = "free_x") #Group 6- Across diets within anemia anemic_only <-subset_samples(anemia_final, anemia=="anemic") #Alpha Diversity plot_richness(anemic_only, measures = c("Shannon", "Chao1")) plot_richness(anemic_only, x = "diet")+ geom_boxplot() #Beta Diversity # unweighted unifrac pcoA plot bc_dm_anemiconly <- distance(anemic_only, method="unifrac") pcoa_bc_anemiaonly <- ordinate(anemic_only, method="PCoA", distance=bc_dm_anemiconly) plot_ordination(anemic_only, pcoa_bc_anemiaonly, color= 'diet')+ stat_ellipse(aes(group= diet), linetype = 2) # bray curtis bc_dm_anemic_diets <- distance(anemic_only, method="bray") pcoa_bc_anemic_diets <- ordinate(anemic_only, method="PCoA", distance=bc_dm_anemic_diets) plot_ordination(anemic_only, pcoa_bc_anemic_diets, color = "diet") + stat_ellipse(aes(group= diet), linetype = 2) # jaccard j_dm_anemic_diets <- distance(anemic_only, method="jaccard") pcoa_j_anemic_diets <- ordinate(anemic_only, method="PCoA", distance=j_dm_anemic_diets) plot_ordination(anemic_only, pcoa_j_anemic_diets, color = "diet")+stat_ellipse(aes(group= diet), linetype = 2) #Taxa barplot anemia_RA_anemiconly <- transform_sample_counts(anemic_only, function(x) x/sum(x)) plot_bar(anemia_RA_anemiconly, fill="Phylum") anemia_phylum_anemiconly <- tax_glom(anemia_RA_anemiconly, taxrank = "Phylum", NArm=FALSE) plot_bar(anemia_phylum_anemiconly, fill="Phylum") + facet_wrap(.~diet, scales = "free_x") #statistical tests for AIMS 1 and 2 # across diets within anemia anemia_diets_stats<-subset_samples(anemia_final, anemia=="anemic") alphadiv1<-estimate_richness(anemia_diets_stats) samp_data1<-sample_data(anemia_diets_stats) samp_dat_wdiv1<-data.frame(samp_data1, alphadiv1) wilcox.test(samp_dat_wdiv1$Chao1 ~ samp_dat_wdiv1$diet, exact=FALSE) wilcox.test(samp_dat_wdiv1$Shannon ~ samp_dat_wdiv1$diet, exact=FALSE) dm_unifrac <- UniFrac(anemia_diets_stats, weighted= FALSE) adonis2(dm_unifrac ~ diet, data= samp_dat_wdiv1) dm_braycurtis1 <- vegdist(t(otu_table(anemia_diets_stats)), method="bray") # Bray-curtis adonis2(dm_braycurtis1 ~ diet, data= samp_dat_wdiv1) dm_jaccard1 <- vegdist(t(otu_table(anemia_diets_stats)), method="jaccard") # Jaccard adonis2(dm_jaccard1 ~ diet, data= samp_dat_wdiv1) #across diets within normal normal_diets_stats<-subset_samples(anemia_final, anemia=="normal") alphadiv2<-estimate_richness(normal_diets_stats) samp_data2<-sample_data(normal_diets_stats) samp_dat_wdiv2<-data.frame(samp_data2, alphadiv2) wilcox.test(samp_dat_wdiv2$Shannon ~ samp_dat_wdiv2$diet, exact=FALSE) wilcox.test(samp_dat_wdiv2$Chao1 ~ samp_dat_wdiv1$diet, exact=FALSE) dm_unifrac2 <- UniFrac(normal_diets_stats, weighted= FALSE) adonis2(dm_unifrac2 ~ diet, data= samp_dat_wdiv8) dm_braycurtis2 <- vegdist(t(otu_table(normal_diets_stats)), method="bray") # Bray-curtis adonis2(dm_braycurtis2 ~ diet, data= samp_dat_wdiv2) dm_jaccard2 <- vegdist(t(otu_table(normal_diets_stats)), method="jaccard") # Jaccard adonis2(dm_jaccard2 ~ diet, data= samp_dat_wdiv2) #across anemia within BM.soup anemia_soup_stats<-subset_samples(anemia_final, diet=="BM.Soup.Broth") alphadiv7<-estimate_richness(anemia_soup_stats) samp_data7<-sample_data(anemia_soup_stats) samp_dat_wdiv7<-data.frame(samp_data7, alphadiv7) wilcox.test(samp_dat_wdiv7$Shannon ~ samp_dat_wdiv7$anemia, exact=FALSE) wilcox.test(samp_dat_wdiv7$Chao1 ~ samp_dat_wdiv7$anemia, exact=FALSE) dm_unifrac7 <- UniFrac(anemia_soup_stats, weighted= FALSE) adonis2(dm_unifrac7 ~ anemia, data= samp_dat_wdiv7) dm_braycurtis7 <- vegdist(t(otu_table(anemia_soup_stats)), method="bray") # Bray-curtis adonis2(dm_braycurtis7 ~ anemia, data= samp_dat_wdiv7) dm_jaccard7 <- vegdist(t(otu_table(anemia_soup_stats)), method="jaccard") # Jaccard adonis2(dm_jaccard7 ~ anemia, data= samp_dat_wdiv7) #across anemia within BMonly anemia_BM_stats<-subset_samples(anemia_final, diet=="ExclusiveBM") alphadiv8<-estimate_richness(anemia_BM_stats) samp_data8<-sample_data(anemia_BM_stats) samp_dat_wdiv8<-data.frame(samp_data8, alphadiv8) wilcox.test(samp_dat_wdiv8$Shannon ~ samp_dat_wdiv2$anemia, exact=FALSE) wilcox.test(samp_dat_wdiv8$Chao1 ~ samp_dat_wdiv1$anemia, exact=FALSE) dm_unifrac8 <- UniFrac(anemia_BM_stats, weighted= FALSE) adonis2(dm_unifrac8 ~ anemia, data= samp_dat_wdiv8) dm_braycurtis8 <- vegdist(t(otu_table(anemia_BM_stats)), method="bray") # Bray-curtis adonis2(dm_braycurtis8 ~ anemia, data= samp_dat_wdiv8) dm_jaccard8 <- vegdist(t(otu_table(anemia_BM_stats)), method="jaccard") # Jaccard adonis2(dm_jaccard8 ~ anemia, data= samp_dat_wdiv8) #AIM3-Core Microbiome analyses #convert to relative abundance anemia_RA <- transform_sample_counts(anemia_final, function(x) x/sum(x)) #Filter dataset by anemia status anemia_RA_yes <- subset_samples(anemia_RA, anemia=="anemic") anemia_RA_no <- subset_samples(anemia_RA, anemia=="normal") #Filter dataset by diet within anemia anemia_yes_BMsoupbroth <- subset_samples(anemia_RA_yes, diet=="BM.Soup.Broth") anemia_yes_BMonly <- subset_samples(anemia_RA_yes, diet=="ExclusiveBM") #Filter dataset by diet within non-anemia anemia_no_BMsoupbroth <- subset_samples(anemia_RA_no, diet=="BM.Soup.Broth") anemia_no_BMonly <- subset_samples(anemia_RA_no, diet=="ExclusiveBM") # Set a prevalence threshold and abundance threshold. -- within anemia anemia_yes_BMsoupbroth_ASVs <- core_members(anemia_yes_BMsoupbroth, detection=0.001, prevalence = 0.5) anemia_yes_BMonly_ASVs <- core_members(anemia_yes_BMonly, detection=0.001, prevalence = 0.5) # Set a prevalence threshold and abundance threshold. -- within nonanemia anemia_no_BMsoupbroth_ASVs <- core_members(anemia_no_BMsoupbroth, detection=0.001, prevalence = 0.5) anemia_no_BMonly_ASVs <- core_members(anemia_no_BMonly, detection=0.001, prevalence = 0.5) #What are these ASVs? - within anemia tax_table(prune_taxa(anemia_yes_BMonly_ASVs,anemia_final)) prune_taxa(anemia_yes_BMonly_ASVs,anemia_RA) %>% plot_bar(, fill="Genus")+ facet_wrap(.~diet, scales="free") list(BMonly = anemia_yes_BMonly_ASVs, BMsoupbroth = anemia_yes_BMsoupbroth_ASVs) # Make a Venn-diagram - within anemia ggVennDiagram(x=list(BMonly = anemia_yes_BMonly_ASVs, BMsoupbroth = anemia_yes_BMsoupbroth_ASVs) , filename = "venndiagram.png", output=TRUE) + labs(fill="Count") ggsave(filename = "venndiagram-anemia.png", , height=6, width=4) #What are these ASVs? - within NONanemia prune_taxa(anemia_no_BMsoupbroth_ASVs,anemia_final) %>% tax_table() tax_table(prune_taxa(anemia_no_BMonly_ASVs,anemia_final)) prune_taxa(anemia_no_BMonly_ASVs,anemia_RA) %>% plot_bar(, fill="Genus")+ facet_wrap(.~diet, scales="free") list(BMonly = anemia_no_BMonly_ASVs, BMsoupbroth = anemia_no_BMsoupbroth_ASVs) # Make a Venn-diagram - within NONanemia ggVennDiagram(x=list(BMonly = anemia_no_BMonly_ASVs, BMsoupbroth = anemia_no_BMsoupbroth_ASVs) , filename = "venndiagram.png", output=TRUE) + labs(fill="Count") ggsave(filename = "venndiagram-nonanemia.png", , height=6, width=4) #finding the shared ASVs #Example: BMonly vs BMsoupbroth within anemia normal_signature<-intersect(anemia_yes_BMsoupbroth_ASVs,anemia_yes_BMonly_ASVs) only_taxa_we_need<-prune_taxa(normal_signature, anemia_final) table<-tax_table(only_taxa_we_need) table #4 way venn diagram if (!require(devtools)) install.packages("devtools") devtools::install_github("yanlinlin82/ggvenn") library(ggvenn) library(grid) x <- list( BMonly_anemia = anemia_yes_BMonly_ASVs, BMsoupbroth_anemia = anemia_yes_BMsoupbroth_ASVs, BMonly_nonanemia = anemia_no_BMonly_ASVs, BMsoupbroth_nonanemia = anemia_no_BMsoupbroth_ASVs ) ggvenn( x, fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF"), stroke_size = 0.5, set_name_size = 4 ) #AIM4- DESEq2 load("anemia_final.RData") load("anemia_rare.RData") library(DESeq2) #Across diets within anemic anemia_filtered_aim3_anemiaonly<- subset_samples(anemia_final, anemia == "anemic" ) anemia_deseq<-phyloseq_to_deseq2(anemia_filtered_aim3_anemiaonly, ~diet) DESEQ_anemia_diets<- DESeq(anemia_deseq) res <- results(DESEQ_anemia_diets, tidy=TRUE) view(res) ggplot(res) + geom_point(aes(x=log2FoldChange, y=-log10(padj))) res %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) sigASVs_anemiaonly <- res %>% filter(padj<0.01 & abs(log2FoldChange)>1.5) %>% dplyr::rename(ASV=row) View(sigASVs_anemiaonly) # Get only asv names sigASVs_vec_anemia_only <- sigASVs_anemiaonly %>% pull(ASV) # Prune phyloseq file anemia_DESeq_within_anemia <- prune_taxa(sigASVs_anemiaonly,anemia_final) sigASVs_anemiaonly <- tax_table(anemia_DESeq_within_anemia) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs_anemiaonly) %>% arrange(log2FoldChange) %>% mutate(Genus = make.unique(Genus)) %>% mutate(Genus = factor(Genus, levels=unique(Genus))) ggplot(sigASVs_anemiaonly) + geom_point(aes(x=Genus, y=log2FoldChange, color= Genus), stat="identity")+ geom_errorbar(aes(x=Genus, ymin=log2FoldChange-lfcSE, ymax=log2FoldChange+lfcSE)) + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5)) #Across diets within normal anemia_filtered_aim3_normalonly<- subset_samples(anemia_final, anemia == "normal" ) normal_deseq<-phyloseq_to_deseq2(anemia_filtered_aim3_normalonly, ~diet) DESEQ_normal_diets<- DESeq(normal_deseq) res1 <- results(DESEQ_normal_diets, tidy=TRUE) view(res1) ggplot(res1) + geom_point(aes(x=log2FoldChange, y=-log10(padj))) res1 %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) sigASVs_normal <- res1 %>% filter(padj<0.05 & abs(log2FoldChange)>1.5) %>% dplyr::rename(ASV=row) View(sigASVs_normal) # Get only asv names sigASVs_vec_normal <- sigASVs_normal %>% pull(ASV) # Prune phyloseq file anemia_DESeq_within_normal <- prune_taxa(sigASVs_vec_normal,anemia_final) sigASVs_normal <- tax_table(anemia_DESeq_within_normal) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs_normal) %>% arrange(log2FoldChange) %>% mutate(Genus = make.unique(Genus)) %>% mutate(Genus = factor(Genus, levels=unique(Genus))) ggplot(sigASVs_normal) + geom_point(aes(x=Genus, y=log2FoldChange, color= Genus), stat="identity")+ geom_errorbar(aes(x=Genus, ymin=log2FoldChange-lfcSE, ymax=log2FoldChange+lfcSE)) + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5)) #Across anemic status within BM only diet anemia_filtered_aim3_anemia_normal_BMonly<- subset_samples(anemia_final, diet="ExclsuiveBM" ) BM_deseq<-phyloseq_to_deseq2(anemia_filtered_aim3_anemia_normal_BMonly, ~anemia) DESEQ_BM_diets<- DESeq(BM_deseq) res2 <- results(DESEQ_BM_diets, tidy=TRUE) view(res2) ggplot(res2) + geom_point(aes(x=log2FoldChange, y=-log10(padj))) res2 %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) sigASVs2 <- res2 %>% filter(padj<0.05 & abs(log2FoldChange)>1.5) %>% dplyr::rename(ASV=row) View(sigASVs2) # Get only asv names sigASVs_vec2 <- sigASVs %>% pull(ASV) # Prune phyloseq file anemia_DESeq_within_BMonly <- prune_taxa(sigASVs_vec2,anemia_final) sigASVs2 <- tax_table(anemia_DESeq_within_BMonly) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs2) %>% arrange(log2FoldChange) %>% mutate(Genus = make.unique(Genus)) %>% mutate(Genus = factor(Genus, levels=unique(Genus))) ggplot(sigASVs2) + geom_point(aes(x=Genus, y=log2FoldChange, color= Genus), stat="identity")+ geom_errorbar(aes(x=Genus, ymin=log2FoldChange-lfcSE, ymax=log2FoldChange+lfcSE)) + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5)) #Across anemic status within complete diet anemia_filtered_aim3_anemia_normal_soup<- subset_samples(anemia_final, diet="BM.Soup.Broth" ) Soup_Broth_deseq<-phyloseq_to_deseq2(anemia_filtered_aim3_anemia_normal_soup, ~anemia) DESEQ_Soup_Broth_diets<- DESeq(Soup_Broth_deseq) res3 <- results(DESEQ_Soup_Broth_diets, tidy=TRUE) view(res3) ggplot(res3) + geom_point(aes(x=log2FoldChange, y=-log10(padj))) res3 %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) sigASVs3 <- res3 %>% filter(padj<0.05 & abs(log2FoldChange)>1.5) %>% dplyr::rename(ASV=row) View(sigASVs3) # Get only asv names sigASVs_vec3 <- sigASVs3 %>% pull(ASV) # Prune phyloseq file anemia_DESeq_within_BM.Soup <- prune_taxa(sigASVs_vec3,anemia_final) sigASVs3 <- tax_table(anemia_DESeq_within_BM.Soup) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs3) %>% arrange(log2FoldChange) %>% mutate(Genus = make.unique(Genus)) %>% mutate(Genus = factor(Genus, levels=unique(Genus))) ggplot(sigASVs3) + geom_point(aes(x=Genus, y=log2FoldChange, color= Genus), stat="identity")+ geom_errorbar(aes(x=Genus, ymin=log2FoldChange-lfcSE, ymax=log2FoldChange+lfcSE)) + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5)) #AIM4- Picrust R portion (the QIIME analysis is scripted with the rest of the QIIME analyses) picrustfp_EC <-"C:/Users/apsar/OneDrive/Desktop/pred_metagenome_unstrat_descrip.tsv/picrust/picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz" picrust_EC<- read.table(picrustfp_EC, sep="\t", header=FALSE, comment.char="#",na.strings=".", stringsAsFactors=FALSE, quote="", fill=FALSE) metafp <-"C:/Users/apsar/OneDrive/Desktop/anemia/anemia_aim1/project2/anemia/anemia_metadata.txt" meta <- read_delim(metafp, delim="\t") meta_df <-as.data.frame(meta[,-1]) rownames(meta_df)<-meta$`#SampleID` ctsTable_EC <- read.table(picrustfp_EC, sep="\t", header=FALSE, comment.char="#",na.strings=".", stringsAsFactors=FALSE, quote="", fill=FALSE) countDataMatrix_EC <- as.matrix(ctsTable_EC[-1,-1]) # Extract counts & Ignore first column (geneID) and first row (sample names) rownames(countDataMatrix_EC) <- ctsTable_EC[-1,2] # Set rownames to geneIDs colnames(countDataMatrix_EC) <- ctsTable_EC[1,-1] # Set colnames to sample names countDataMatrix_EC<-countDataMatrix_EC[,-1] mode(countDataMatrix_EC) <- "integer" # Convert to integer dds_EC <- DESeqDataSetFromMatrix(countData = countDataMatrix_EC[,1:ncol(countDataMatrix_EC)], colData = meta_df[,2:ncol(meta_df)],design= ~ diet) dds_EC <- DESeq(dds_EC) dds.sub_EC<- dds_EC[, dds_EC$anemia %in% c("anemic")] dds.sub1_EC <- dds.sub_EC[ , dds.sub_EC$diet %in% c("ExclusiveBM", "BM.Soup.Broth") ] colData(dds.sub1_EC) res_EC<- results(dds.sub1_EC) summary(res_EC) res_dataframe_EC = data.frame(res_EC) filtered_EC_dataframe<- subset(res_dataframe_EC, padj< 0.05 & abs(res_dataframe_EC$log2FoldChange) > 1.5) filtered_EC_dataframe$enzyme = rownames(filtered_EC_dataframe) filtered_EC_dataframe$enzyme<- factor(filtered_EC_dataframe$enzyme, levels=filtered_EC_dataframe$enzyme[order(filtered_EC_dataframe$log2FoldChange)]) ggplot(filtered_EC_dataframe,aes(x=log2FoldChange,y=enzyme, fill = padj), stat="identity") + geom_col()+ theme_bw() + theme(text = element_text(size=13), axis.text.y = element_text(angle = 0),axis.text.x= element_text(angle=90)) + labs(y="Enzymes",x="Log2FoldChange", colour = "padj") + scale_fill_gradient(low = "blue", high = "red")+ scale_y_discrete(labels = function(y) str_wrap(y, width =50))