#### MANIFEST FILTERING #### # Load packages library(tidyverse) # Read fish_manifest.txt file fish_manifest <- "fish_manifest.txt" manifest <- read_delim(fish_manifest, delim = "\t") mani <- as.data.frame(manifest) # Read fish_metadata.txt file fish_metadata <- "fish_metadata.txt" metadata <- read_delim(fish_metadata, delim = "\t") meta <- as.data.frame(metadata) # Add a new column with "site" mani_with_site <- mutate(mani, "site" = meta$sample_type) # Filter for each site (gill, skin, midgut, hindgut) and save as a new data frame gill_mani <- filter(mani_with_site, site=="gill") skin_mani <- filter(mani_with_site, site=="skin") midgut_mani <- filter(mani_with_site, site=="midgut") hindgut_mani <- filter(mani_with_site, site=="hindgut") # Remove the column site from the data frame gill_mani_final <- subset(gill_mani,select = -c(site)) skin_mani_final <- subset(skin_mani,select = -c(site)) midgut_mani_final <- subset(midgut_mani,select = -c(site)) hindgut_mani_final <- subset(hindgut_mani,select = -c(site)) # Save data frames for each body site as a manifest.txt file write.table(gill_mani_final, quote = FALSE, file = "gill_manifest.txt", sep = "\t", row.names = FALSE) write.table(skin_mani_final, quote = FALSE, file = "skin_manifest.txt", sep = "\t", row.names = FALSE) write.table(midgut_mani_final, quote = FALSE, file = "midgut_manifest.txt", sep = "\t", row.names = FALSE) write.table(hindgut_mani_final, quote = FALSE, file = "hindgut_manifest.txt", sep = "\t", row.names = FALSE) #### METADATA FILTERING #### # Load packages library(dplyr) library(tidyverse) # Read fish_metadata.csv file fish_metadata <- "fish_metadata.csv" meta <- read.csv(fish_metadata) # Filter out rows where distance and depth level are not applicable, # remove water(saline) samples meta_final <- subset(meta, distance_from_shore_m != "not applicable" & sample_type != "water (saline)" & habitat_depth_level2 != "not applicable" & meta$trophic == 3) # Rename the first column header to sampleid colnames(meta_final)[1] <- "sampleid" # Use a for and if/else loop to group habitat_depth_level2 into low, medium or high in a new column for(d in 1:nrow(meta_final)) { if (meta_final$habitat_depth_level2[d] == "intertidal"){ meta_final$depth_category[d] <- "low" }else if (meta_final$habitat_depth_level2[d] == "neritic"){ meta_final$depth_category[d] <- "low" }else if (meta_final$habitat_depth_level2[d] == "mesopelagic"){ meta_final$depth_category[d] <- "medium" }else if (meta_final$habitat_depth_level2[d] == "bathypelagic"){ meta_final$depth_category[d] <- "medium" }else if (meta_final$habitat_depth_level2[d] == "mesopelagic and benthopelagic"){ meta_final$depth_category[d] <- "medium" }else if (meta_final$habitat_depth_level2[d] == "abyssalpelagic"){ meta_final$depth_category[d] <- "high" }else if (meta_final$habitat_depth_level2[d] == "benthopelagic"){ meta_final$depth_category[d] <- "high" } } # To make distance_from_shore_m a numeric value meta_final$distance_from_shore_m <- as.numeric(meta_final$distance_from_shore_m) # To group the samples into three categories based on distance from shore (metres), # adds a new column with the designated category meta_final$distance_category <- ntile(meta_final$distance_from_shore_m, 3) # To manually change sample 13414.58.skin.R1.fastq.gz to distance category 1 meta_final[meta_final$sampleid == "13414.58.skin.R1.fastq.gz", "distance_category"] <- 1 # To manually change sample 13414.79.gill.R1.fastq.gz to distance category 3 meta_final[meta_final$sampleid == "13414.79.gill.R1.fastq.gz", "distance_category"] <- 3 # Binning the samples by depth and distance from shore into nine regions for(d in 1:nrow(meta_final)) { if (meta_final$depth_category[d] == "low" & meta_final$distance_category[d] == "1"){ meta_final$habitat_region[d] <- "region 1" }else if (meta_final$depth_category[d] == "low" & meta_final$distance_category[d] == "2"){ meta_final$habitat_region[d] <- "region 2" }else if (meta_final$depth_category[d] == "low" & meta_final$distance_category[d] == "3"){ meta_final$habitat_region[d] <- "region 3" }else if (meta_final$depth_category[d] == "medium" & meta_final$distance_category[d] == "1"){ meta_final$habitat_region[d] <- "region 4" }else if (meta_final$depth_category[d] == "medium" & meta_final$distance_category[d] == "2"){ meta_final$habitat_region[d] <- "region 5" }else if (meta_final$depth_category[d] == "medium" & meta_final$distance_category[d] == "3"){ meta_final$habitat_region[d] <- "region 6" }else if (meta_final$depth_category[d] == "high" & meta_final$distance_category[d] == "1"){ meta_final$habitat_region[d] <- "region 7" }else if (meta_final$depth_category[d] == "high" & meta_final$distance_category[d] == "2"){ meta_final$habitat_region[d] <- "region 8" }else if (meta_final$depth_category[d] == "high" & meta_final$distance_category[d] == "3"){ meta_final$habitat_region[d] <- "region 9" } } # Count how many observations are in each region meta_final %>% count(habitat_region) # Filter to keep habitat region 1, 3, and 6 meta_final_filtered <- subset(meta_final, habitat_region != "region 2") # To add a new column Habitat Region that renames region 1 to Region 1, region 3 to Region 2, and region 6 to Region 3 meta_final_filtered = meta_final_filtered %>% mutate("Habitat_Region" = ifelse(habitat_region == "region 1","Region 1", ifelse(habitat_region == "region 3","Region 2", ifelse(habitat_region == "region 6","Region 3",NA)))) # To see how many samples are in each region meta_final_filtered %>% count(habitat_region) #habitat_region n # 1 region 1 67 # 2 region 3 27 # 3 region 6 40 # Save the metadata file onto local computer as .txt file write.table(meta_final_filtered, file = "filtered_metadata.txt", sep = "\t", row.names = FALSE) #### PHYLOSEQ OBJECT GENERATION #### library(phyloseq) library(ape) # importing trees library(tidyverse) library(vegan) # for rarefaction analysis #MIDGUT # Load in ASV table, metadata table, taxonomy table, and phylogenetic tree metafp <- "filtered_metadata.txt" meta <- read_delim(metafp, delim="\t") otufp <- "midgut_table.txt" otu <- read_delim(file = otufp, delim="\t", skip=1) taxfp <- "midgut_taxonomy.tsv" tax <- read_delim(taxfp, delim="\t") phylotreefp <- "midgut_rooted_tree.nwk" phylotree <- read.tree(phylotreefp) class(phylotree) # Format OTU table # save everything except first column (OTU ID) into a matrix otu_mat <- as.matrix(otu[,-1]) # Make first column (#OTU ID) the rownames of the new matrix rownames(otu_mat) <- otu$`#OTU ID` # Use the "otu_table" function to make an OTU table OTU <- otu_table(otu_mat, taxa_are_rows = TRUE) class(OTU) # Format sample metadata # Save everything except sample-id as new data frame samp_df <- as.data.frame(meta[,-1]) # Make sampleids the rownames rownames(samp_df)<- meta$sampleid # Make phyloseq sample data with sample_data() function SAMP <- sample_data(samp_df) class(SAMP) # Formatting taxonomy # Convert taxon strings to a table with separate taxa rank columns tax_mat <- tax %>% select(-Confidence)%>% separate(col=Taxon, sep="; " , into = c("Domain","Phylum","Class","Order","Family","Genus","Species")) %>% as.matrix() # Saving as a matrix # Save everything except feature IDs tax_mat <- tax_mat[,-1] # Make sampleids the rownames rownames(tax_mat) <- tax$`Feature ID` # Make taxa table TAX <- tax_table(tax_mat) class(TAX) # Create phyloseq object # Merge all into a phyloseq object midgut <- phyloseq(OTU, SAMP, TAX, phylotree) # View components of phyloseq object with the following commands otu_table(midgut) sample_data(midgut) tax_table(midgut) phy_tree(midgut) # To remove mitochondria and chloroplasts (and keep only Bacteria) midgut_filtered <- subset_taxa(midgut, Domain == "d__Bacteria" & Genus!="g__Chloroplast" & Class!="c__Chloroplast" & Family !="f__Mitochondria") tax_table(midgut_filtered) # To remove low-abundance reads (e.g. <5 reads total) midgut_filtered_nolow <- filter_taxa(midgut_filtered, function(x) sum(x)>5, prune = TRUE) # To remove samples that have less than 100 reads midgut_filtered_final <- prune_samples(sample_sums(midgut_filtered_nolow)>100, midgut_filtered_nolow) # To make a rarefaction curve and then filter based on a chosen sequencing depth rarecurve(t(as.data.frame(otu_table(midgut_filtered_final))), cex = 0.1, xlim=c(0,2000)) midgut_rarified <- rarefy_even_depth(midgut_filtered_final, rngseed = 1, sample.size = 1200) # To save the phyloseq object as an RData file save(midgut_filtered_final, file="midgut_filtered_final.RData") save(midgut_rarified, file="midgut_rarified.RData") #HINDGUT # Load in ASV table, metadata table, taxonomy table, and phylogenetic tree metafp <- "filtered_metadata.txt" h_meta <- read_delim(metafp, delim="\t") h_otufp <- "hindgut_table.txt" h_otu <- read_delim(file = h_otufp, delim="\t", skip=1) h_taxfp <- "hindgut_taxonomy.tsv" h_tax <- read_delim(h_taxfp, delim="\t") h_phylotreefp <- "hindgut_rooted_tree.nwk" h_phylotree <- read.tree(h_phylotreefp) class(h_phylotree) # Format OTU table # save everything except first column (OTU ID) into a matrix h_otu_mat <- as.matrix(h_otu[,-1]) # Make first column (#OTU ID) the rownames of the new matrix rownames(h_otu_mat) <- h_otu$`#OTU ID` # Use the "otu_table" function to make an OTU table h_OTU <- otu_table(h_otu_mat, taxa_are_rows = TRUE) class(h_OTU) # Format sample metadata # Save everything except sample-id as new data frame h_samp_df <- as.data.frame(h_meta[,-1]) # Make sampleids the rownames rownames(h_samp_df)<- h_meta$sampleid # Make phyloseq sample data with sample_data() function h_SAMP <- sample_data(h_samp_df) class(h_SAMP) # Formatting taxonomy # Convert taxon strings to a table with separate taxa rank columns h_tax_mat <- h_tax %>% select(-Confidence)%>% separate(col=Taxon, sep="; " , into = c("Domain","Phylum","Class","Order","Family","Genus","Species")) %>% as.matrix() # Saving as a matrix # Save everything except feature IDs h_tax_mat <- h_tax_mat[,-1] # Make sampleids the rownames rownames(h_tax_mat) <- h_tax$`Feature ID` # Make taxa table h_TAX <- tax_table(h_tax_mat) class(h_TAX) # Create phyloseq object # Merge all into a phyloseq object hindgut <- phyloseq(h_OTU, h_SAMP, h_TAX, h_phylotree) # View components of phyloseq object with the following commands otu_table(hindgut) sample_data(hindgut) tax_table(hindgut) phy_tree(hindgut) # To remove mitochondria and chloroplasts (and keep only Bacteria) hindgut_filtered <- subset_taxa(hindgut, Domain == "d__Bacteria" & Genus !="g_Chloroplast" & Class!="c__Chloroplast" & Family !="f__Mitochondria") tax_table(hindgut_filtered) # To remove low-abundance taxa (e.g. <5 reads total) hindgut_filtered_nolow <- filter_taxa(hindgut_filtered, function(x) sum(x)>5, prune = TRUE) # To remove samples that have less than 100 reads hindgut_filtered_final <- prune_samples(sample_sums(hindgut_filtered_nolow)>100, hindgut_filtered_nolow) # To make a rarefaction curve and then filter based on a chosen sequencing depth rarecurve(t(as.data.frame(otu_table(hindgut_filtered_final))), cex = 0.1, xlim=c(0,2000)) hindgut_rarified <- rarefy_even_depth(hindgut_filtered_final, rngseed = 1, sample.size = 1200) # To save the phyloseq object as an RData file save(hindgut_filtered_final, file="hindgut_filtered_final.RData") save(hindgut_rarified, file="hindgut_rarified.RData") #### ALPHA DIVERSITY #### # Load packages library(phyloseq) library(vegan) #To load previously saves .RData file load("midgut_rarified.RData") load("hindgut_rarified.RData") # MIDGUT # Determine Shannon and Chao1 diversity midgut_alpha_diversity <- estimate_richness(midgut_rarified, measures = c("Shannon", "Chao1")) # Save midgut_rarified as a data frame midgut_samp_dat <- sample_data(midgut_rarified) # Add midgut_alpha diversity to midgut_samp_dat midgut_samp_dat_wdiv <- data.frame(midgut_samp_dat, midgut_alpha_diversity) # HINDGUT # Determine Shannon and Chao1 diversity hindgut_alpha_diversity <- estimate_richness(hindgut_rarified, measures = c("Shannon", "Chao1")) # Save hindgut_rarified as a data frame hindgut_samp_dat <- sample_data(hindgut_rarified) # Add hindgut_alpha diversity to hindgut_samp_dat hindgut_samp_dat_wdiv <- data.frame(hindgut_samp_dat, hindgut_alpha_diversity) ### PLOT ALPHA DIVERSITY # Install packages install.packages(ggpubr) # Load packages library(ggplot2) library(ggpubr) library(phyloseq) #To load previously saves .RData file load("midgut_rarified.RData") load("hindgut_rarified.RData") # MIDGUT # To plot alpha diversity (Shannon, Chao1) for midgut rarified samples midgut_alpha_plot <- plot_richness(midgut_rarified, x = ("Habitat_Region"), measures = c("Shannon","Chao1"), color = ("Habitat_Region"), title = ("Midgut")) + geom_boxplot() # To create a vector for Wilcoxon comparisons my_comparisons <- list( c("Region 1", "Region 3"), c("Region 1", "Region 2"), c("Region 2", "Region 3") ) # To rename the legend and x-axis renamed_midgut_alpha_plot <- ggpar(midgut_alpha_plot, xlab = ("Habitat Region"), legend.title = ("Habitat Region")) # To plot alpha diversity plot with Wilcoxon statistics renamed_midgut_alpha_plot + stat_compare_means(comparisons = my_comparisons) + theme_bw(base_size=16) # HINDGUT # To plot alpha diversity (Shannon, Chao1) for hindgut rarified samples hindgut_alpha_plot <- plot_richness(hindgut_rarified, x = "Habitat_Region", measures = c("Shannon","Chao1"), color = "Habitat_Region", title ="Hindgut") + geom_boxplot() # To create a vector for Wilcoxon comparisons my_comparisons <- list( c("Region 1", "Region 3"), c("Region 1", "Region 2"), c("Region 2", "Region 3") ) # To rename the legend and x-axis renamed_hindgut_alpha_plot <- ggpar(hindgut_alpha_plot, xlab = ("Habitat Region"), legend.title = ("Habitat Region")) # To plot alpha diversity plot with Wilcoxon statistics renamed_hindgut_alpha_plot + stat_compare_means(comparisons = my_comparisons) + theme_bw(base_size=16) #### KRUSKAL-WALLIS TEST #### # MIDGUT # Kruskal-Wallis test using Shannon diversity for Regions 1, 3, and 6 kruskal.test(Shannon ~ habitat_region, data=midgut_samp_dat_wdiv) # Kruskal-Wallis test using Chao1 diversity for Regions 1, 3, and 6 kruskal.test(Chao1 ~ habitat_region, data=midgut_samp_dat_wdiv) # HINDGUT # Kruskal-Wallis test using Shannon diversity for Regions 1, 3, and 6 kruskal.test(Shannon ~ habitat_region, data=hindgut_samp_dat_wdiv) # Kruskal-Wallis test using Chao1 diversity for Regions 1, 3, and 6 kruskal.test(Chao1 ~ habitat_region, data=hindgut_samp_dat_wdiv) #### WILCOXON RANK SUM TEST #### # MIDGUT # Create vectors with Shannon diversity for midgut samples from habitat region 1 and 6 midgut_shannon_1_vector <- filter(midgut_samp_dat_wdiv, habitat_region =="region 1") %>% pull(Shannon) midgut_shannon_6_vector <- filter(midgut_samp_dat_wdiv, habitat_region =="region 6") %>% pull(Shannon) # Wilcoxon test to compare Shannon diversity between midgut samples in region 1 vs. 6 wilcox.test(midgut_shannon_1_vector, midgut_shannon_6_vector) # Create a vector with Shannon diversity for midgut samples from habitat region 3 midgut_shannon_3_vector <- filter(midgut_samp_dat_wdiv, habitat_region =="region 3") %>% pull(Shannon) # Wilcoxon test to compare Shannon diversity between midgut samples in region 1 vs. 3 wilcox.test(midgut_shannon_1_vector, midgut_shannon_3_vector) # Wilcoxon test to compare Shannon diversity between hindgut samples in region 3 vs. 6 wilcox.test(midgut_shannon_3_vector, midgut_shannon_6_vector) # Create vectors with Chao1 diversity for midgut samples from habitat region 1 and 6 midgut_chao_1_vector <- filter(midgut_samp_dat_wdiv, habitat_region =="region 1") %>% pull(Chao1) midgut_chao_6_vector <- filter(midgut_samp_dat_wdiv, habitat_region =="region 6") %>% pull(Chao1) # Wilcoxon test to compare Chao1 diversity between midgut samples in region 1 vs. 6 wilcox.test(midgut_chao_1_vector, midgut_chao_6_vector) # Create a vector with Chao1 diversity for midgut samples from habitat region 3 midgut_chao_3_vector <- filter(midgut_samp_dat_wdiv, habitat_region =="region 3") %>% pull(Chao1) # Wilcoxon test to compare Chao1 diversity between midgut samples in region 1 vs. 3 wilcox.test(midgut_chao_1_vector, midgut_chao_3_vector) # Wilcoxon test to compare Chao1 diversity between midgut samples in region 3 vs. 6 wilcox.test(midgut_chao_3_vector, midgut_chao_6_vector) # HINDGUT # Create vectors with Shannon diversity for hindgut samples from habitat region 1 and 6 hindgut_shannon_1_vector <- filter(hindgut_samp_dat_wdiv, habitat_region =="region 1") %>% pull(Shannon) hindgut_shannon_6_vector <- filter(hindgut_samp_dat_wdiv, habitat_region =="region 6") %>% pull(Shannon) # Wilcoxon test to compare Shannon diversity between hindgut samples in region 1 vs. 6 wilcox.test(hindgut_shannon_1_vector, hindgut_shannon_6_vector) # Create vectors with Shannon diversity for hindgut samples from habitat region 1 and 3 hindgut_shannon_3_vector <- filter(hindgut_samp_dat_wdiv, habitat_region =="region 3") %>% pull(Shannon) # Wilcoxon test to compare Shannon diversity between hindgut samples in region 1 vs. 3 wilcox.test(hindgut_shannon_1_vector, hindgut_shannon_3_vector) # Wilcoxon test to compare Shannon diversity between hindgut samples in region 3 vs. 6 wilcox.test(hindgut_shannon_3_vector, hindgut_shannon_6_vector) # Create vectors with Chao1 diversity for hindgut samples from habitat region 1 and 6 hindgut_chao_1_vector <- filter(hindgut_samp_dat_wdiv, habitat_region =="region 1") %>% pull(Chao1) hindgut_chao_6_vector <- filter(hindgut_samp_dat_wdiv, habitat_region =="region 6") %>% pull(Chao1) # Wilcoxon test to compare Chao1 diversity between hindgut samples in region 1 vs. 6 wilcox.test( hindgut_chao_1_vector, hindgut_chao_6_vector ) # Create vectors with Chao1 diversity for hindgut samples from habitat region 3 hindgut_chao_3_vector <- filter(hindgut_samp_dat_wdiv, habitat_region =="region 3") %>% pull(Chao1) # Wilcoxon test to compare Chao1 diversity between hindgut samples in region 1 vs. 3 wilcox.test( hindgut_chao_1_vector, hindgut_chao_3_vector) # Wilcoxon test to compare Chao1 diversity between hindgut samples in region 3 vs. 6 wilcox.test( hindgut_chao_3_vector, hindgut_chao_6_vector) # MIDGUT VS HINDGUT # Wilcoxon test to compare Shannon diversity between midgut and hindgut samples in region 1 wilcox.test(midgut_shannon_1_vector, hindgut_shannon_1_vector) # Wilcoxon test to compare Shannon diversity between midgut and hindgut samples in region 3 wilcox.test(midgut_shannon_3_vector, hindgut_shannon_3_vector) # Wilcoxon test to compare Shannon diversity between midgut and hindgut samples in region 6 wilcox.test(midgut_shannon_6_vector, hindgut_shannon_6_vector) # Wilcoxon test to compare Chao1 diversity between midgut and hindgut samples in region 1 wilcox.test(midgut_chao_1_vector, hindgut_chao_1_vector) # Wilcoxon test to compare Chao1 diversity between midgut and hindgut samples in region 3 wilcox.test(midgut_chao_3_vector, hindgut_chao_3_vector) # Wilcoxon test to compare Shannon diversity between midgut and hindgut samples in region 6 wilcox.test(midgut_chao_6_vector, hindgut_chao_6_vector) #### BETA-DIVERSITY #### # Load packages library(phyloseq) library(tidyverse) library(ggplot2) # To load previously saves .RData file load("midgut_rarified.RData") load("hindgut_rarified.RData") # MIDGUT # Determine Bray-Curtis distance matrices bc_midgut <- distance(midgut_rarified, method="bray") # Perform and plot principal coordinate analysis for distance matrices pcoa_bc_midgut <- ordinate(midgut_rarified, method="PCoA", distance=bc_midgut) gg_pcoa_midgut <- plot_ordination(midgut_rarified, pcoa_bc_midgut, color = "Habitat_Region", title = "Midgut")+ labs(color="Habitat Region") + stat_ellipse(type = "t") + theme_bw(base_size=16) gg_pcoa_midgut # Save plots ggsave("./plot_pcoa_midgut.png" , gg_pcoa_midgut , height=4, width=7) # HINDGUT # Determine Bray-Curtis distance matrices bc_hindgut <- distance(hindgut_rarified, method="bray") # Perform and plot principal coordinate analysis for distance matrices pcoa_bc_hindgut <- ordinate(hindgut_rarified, method="PCoA", distance=bc_hindgut) gg_pcoa_hindgut <- plot_ordination(hindgut_rarified, pcoa_bc_hindgut, color ="Habitat_Region", title = "Hindgut")+ labs(color="Habitat Region")+ stat_ellipse(type = "t") + theme_bw(base_size=16) gg_pcoa_hindgut # Save plots ggsave("./plot_pcoa_hindgut.png" , gg_pcoa_hindgut , height=4, width=7) #### PERMANOVA #### # Load packages library(tidyverse) library(phyloseq) library(vegan) # To load previously saved .RData file load("midgut_rarified.RData") load("hindgut_rarified.RData") # Subset midgut and hindgut samples by region and calculate Bray-Curtis distance matrix # MIDGUT # regions 1_6 midgut_rarified_1_6 <- subset_samples(midgut_rarified, habitat_region != "region 3") samp_dat_wdiv_midgut_1_6 <- data.frame(sample_data(midgut_rarified_1_6), estimate_richness(midgut_rarified_1_6)) dm_bray_midgut_1_6 <- vegdist(t(otu_table(midgut_rarified_1_6)), method="bray") adonis2(dm_bray_midgut_1_6 ~ habitat_region, data=samp_dat_wdiv_midgut_1_6) # regions 1_3 midgut_rarified_1_3 <- subset_samples(midgut_rarified, habitat_region != "region 6") samp_dat_wdiv_midgut_1_3 <- data.frame(sample_data(midgut_rarified_1_3), estimate_richness(midgut_rarified_1_3)) dm_bray_midgut_1_3 <- vegdist(t(otu_table(midgut_rarified_1_3)), method="bray") adonis2(dm_bray_midgut_1_3 ~ habitat_region, data=samp_dat_wdiv_midgut_1_3) # regions 3_6 midgut_rarified_3_6 <- subset_samples(midgut_rarified, habitat_region != "region 6") samp_dat_wdiv_midgut_3_6 <- data.frame(sample_data(midgut_rarified_3_6), estimate_richness(midgut_rarified_3_6)) dm_bray_midgut_3_6 <- vegdist(t(otu_table(midgut_rarified_3_6)), method="bray") adonis2(dm_bray_midgut_3_6 ~ habitat_region, data=samp_dat_wdiv_midgut_3_6) # HINDGUT # regions 1_6 hindgut_rarified_1_6 <- subset_samples(hindgut_rarified, habitat_region != "region 3") samp_dat_wdiv_hindgut_1_6 <- data.frame(sample_data(hindgut_rarified_1_6), estimate_richness(hindgut_rarified_1_6)) dm_bray_hindgut_1_6 <- vegdist(t(otu_table(hindgut_rarified_1_6)), method="bray") adonis2(dm_bray_hindgut_1_6 ~ habitat_region, data=samp_dat_wdiv_hindgut_1_6) # regions 1_3 hindgut_rarified_1_3 <- subset_samples(hindgut_rarified, habitat_region != "region 6") samp_dat_wdiv_hindgut_1_3 <- data.frame(sample_data(hindgut_rarified_1_3), estimate_richness(hindgut_rarified_1_3)) dm_bray_hindgut_1_3 <- vegdist(t(otu_table(hindgut_rarified_1_3)), method="bray") adonis2(dm_bray_hindgut_1_3 ~ habitat_region, data=samp_dat_wdiv_hindgut_1_3) # regions 3_6 hindgut_rarified_3_6 <- subset_samples(hindgut_rarified, habitat_region != "region 6") samp_dat_wdiv_hindgut_3_6 <- data.frame(sample_data(hindgut_rarified_3_6), estimate_richness(hindgut_rarified_3_6)) dm_bray_hindgut_3_6 <- vegdist(t(otu_table(hindgut_rarified_3_6)), method="bray") adonis2(dm_bray_hindgut_3_6 ~ habitat_region, data=samp_dat_wdiv_hindgut_3_6) #### DESeq #### # Load packages library(tidyverse) library(phyloseq) library(DESeq2) library(dplyr) library(ggplot2) library(ggVennDiagram) # load previously saved raw .RData files (NOT RAREFIED) load("midgut_filtered_final.RData") load("hindgut_filtered_final.RData") # filter samples by region + glom for genus midgut_filtered_final_1_6 <- subset_samples(midgut_filtered_final, habitat_region != "region 3") midgut_genus_1_6 <- tax_glom(midgut_filtered_final_1_6, taxrank = "Genus") hindgut_filtered_final_1_6 <- subset_samples(hindgut_filtered_final, habitat_region != "region 3") hindgut_genus_1_6 <- tax_glom(hindgut_filtered_final_1_6, taxrank = "Genus") midgut_filtered_final_1_3 <- subset_samples(midgut_filtered_final, habitat_region != "region 6") midgut_genus_1_3 <- tax_glom(midgut_filtered_final_1_3, taxrank = "Genus") hindgut_filtered_final_1_3 <- subset_samples(hindgut_filtered_final, habitat_region != "region 6") hindgut_genus_1_3 <- tax_glom(hindgut_filtered_final_1_3, taxrank = "Genus") midgut_filtered_final_3_6 <- subset_samples(midgut_filtered_final, habitat_region != "region 1") midgut_genus_3_6 <- tax_glom(midgut_filtered_final_3_6, taxrank = "Genus") hindgut_filtered_final_3_6 <- subset_samples(hindgut_filtered_final, habitat_region != "region 1") hindgut_genus_3_6 <- tax_glom(hindgut_filtered_final_3_6, taxrank = "Genus") # MIDGUT # regions 1_6 midgut_plus1_6 <- transform_sample_counts(midgut_genus_1_6, function(x) x+1) midgut_deseq1_6 <- phyloseq_to_deseq2(midgut_plus1_6, ~habitat_region) DESEQ_midgut1_6 <- DESeq(midgut_deseq1_6) res_midgut1_6 <- results(DESEQ_midgut1_6, tidy=TRUE) View(res_midgut1_6) # regions 1_3 midgut_plus1_3 <- transform_sample_counts(midgut_genus_1_3, function(x) x+1) midgut_deseq1_3 <- phyloseq_to_deseq2(midgut_plus1_3, ~habitat_region) DESEQ_midgut1_3 <- DESeq(midgut_deseq1_3) res_midgut1_3 <- results(DESEQ_midgut1_3, tidy=TRUE) View(res_midgut1_3) # regions 3_6 midgut_plus3_6 <- transform_sample_counts(midgut_genus_3_6, function(x) x+1) midgut_deseq3_6 <- phyloseq_to_deseq2(midgut_plus3_6, ~habitat_region) DESEQ_midgut3_6 <- DESeq(midgut_deseq3_6) res_midgut3_6 <- results(DESEQ_midgut3_6, tidy=TRUE) View(res_midgut3_6) # HINDGUT # regions 1_6 hindgut_plus1_6 <- transform_sample_counts(hindgut_genus_1_6, function(x) x+1) hindgut_deseq1_6 <- phyloseq_to_deseq2(hindgut_plus1_6, ~habitat_region) DESEQ_hindgut1_6 <- DESeq(hindgut_deseq1_6) res_hindgut1_6 <- results(DESEQ_hindgut1_6, tidy=TRUE) View(res_hindgut1_6) # regions 1_3 hindgut_plus1_3 <- transform_sample_counts(hindgut_genus_1_3, function(x) x+1) hindgut_deseq1_3 <- phyloseq_to_deseq2(hindgut_plus1_3, ~habitat_region) DESEQ_hindgut1_3 <- DESeq(hindgut_deseq1_3) res_hindgut1_3 <- results(DESEQ_hindgut1_3, tidy=TRUE) View(res_hindgut1_3) # regions 3_6 hindgut_plus3_6 <- transform_sample_counts(hindgut_genus_3_6, function(x) x+1) hindgut_deseq3_6 <- phyloseq_to_deseq2(hindgut_plus3_6, ~habitat_region) DESEQ_hindgut3_6 <- DESeq(hindgut_deseq3_6) res_hindgut3_6 <- results(DESEQ_hindgut3_6, tidy=TRUE) View(res_hindgut3_6) # Look at results # MIDGUT # regions 1_6 res_midgut1_6 %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) # regions 1_3 res_midgut1_3 %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) # regions 3_6 res_midgut3_6 %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) # HINDGUT # regions 1_6 res_hindgut1_6 %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) # regions 1_3 res_hindgut1_3 %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) # regions 3_6 res_hindgut3_6 %>% mutate(significant = padj<0.01 & abs(log2FoldChange)>2) %>% ggplot() + geom_point(aes(x=log2FoldChange, y=-log10(padj), col=significant)) # Get table of results # MIDGUT # regions 1_6 sigASVs_midgut_1_6 <- res_midgut1_6 %>% filter(padj<0.01 & abs(log2FoldChange)>2) %>% dplyr::rename(ASV=row) View(sigASVs_midgut_1_6) # regions 1_3 sigASVs_midgut_1_3 <- res_midgut1_3 %>% filter(padj<0.01 & abs(log2FoldChange)>2) %>% dplyr::rename(ASV=row) View(sigASVs_midgut_1_3) # regions 3_6 sigASVs_midgut_3_6 <- res_midgut3_6 %>% filter(padj<0.01 & abs(log2FoldChange)>2) %>% dplyr::rename(ASV=row) View(sigASVs_midgut_3_6) # HINDGUT # regions 1_6 sigASVs_hindgut_1_6 <- res_hindgut1_6 %>% filter(padj<0.01 & abs(log2FoldChange)>2) %>% dplyr::rename(ASV=row) View(sigASVs_hindgut_1_6) # regions 1_3 sigASVs_hindgut_1_3 <- res_hindgut1_3 %>% filter(padj<0.01 & abs(log2FoldChange)>2) %>% dplyr::rename(ASV=row) View(sigASVs_hindgut_1_3) # regions 3_6 sigASVs_hindgut_3_6 <- res_hindgut3_6 %>% filter(padj<0.01 & abs(log2FoldChange)>2) %>% dplyr::rename(ASV=row) View(sigASVs_hindgut_3_6) # Get only asv names sigASVs_vec_midgut_1_6 <- sigASVs_midgut_1_6 %>% pull(ASV) sigASVs_vec_midgut_1_3 <- sigASVs_midgut_1_3 %>% pull(ASV) sigASVs_vec_midgut_3_6 <- sigASVs_midgut_3_6 %>% pull(ASV) sigASVs_vec_hindgut_1_6 <- sigASVs_hindgut_1_6 %>% pull(ASV) sigASVs_vec_hindgut_1_3 <- sigASVs_hindgut_1_3 %>% pull(ASV) sigASVs_vec_hindgut_3_6 <- sigASVs_hindgut_3_6 %>% pull(ASV) # Prune phyloseq file and change uncultured genus' to family or order level # MIDGUT # regions 1_6 midgut_DESeq_1_6 <- prune_taxa(sigASVs_vec_midgut_1_6 ,midgut_genus_1_6) sigASVs_midgut_1_6 <- tax_table(midgut_DESeq_1_6) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs_midgut_1_6) %>% arrange(log2FoldChange) %>% mutate(Genus = ifelse(Genus=='g__uncultured', paste(Family,"uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__uncultured', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Genus=='g__Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium',paste('g__Rhizobium'),Genus)) %>% arrange(-log2FoldChange) # regions 1_3 midgut_DESeq_1_3 <- prune_taxa(sigASVs_vec_midgut_1_3 ,midgut_genus_1_3) sigASVs_midgut_1_3 <- tax_table(midgut_DESeq_1_3) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs_midgut_1_3) %>% arrange(log2FoldChange) %>% mutate(Genus = ifelse(Genus=='g__uncultured', paste(Family,"uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__uncultured', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__Unknown_Family', paste(Order,"f_g_uncl",sep='.'),Genus))%>% arrange(-log2FoldChange) # regions 3_6 midgut_DESeq_3_6 <- prune_taxa(sigASVs_vec_midgut_3_6 ,midgut_genus_3_6) sigASVs_midgut_3_6 <- tax_table(midgut_DESeq_3_6) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs_midgut_3_6) %>% arrange(log2FoldChange) %>% mutate(Genus = ifelse(Genus=='g__uncultured', paste(Family,"uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__uncultured', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__Unknown_Family', paste(Order,"f_g_uncl",sep='.'),Genus))%>% arrange(-log2FoldChange) # HINDGUT # regions 1_6 hindgut_DESeq_1_6 <- prune_taxa(sigASVs_vec_hindgut_1_6 ,hindgut_genus_1_6) sigASVs_hindgut_1_6 <- tax_table(hindgut_DESeq_1_6) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs_hindgut_1_6) %>% arrange(log2FoldChange) %>% mutate(Genus = ifelse(Genus=='g__uncultured', paste(Family,"uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__uncultured', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__Unknown_Family', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% arrange(-log2FoldChange) # regions 1_3 hindgut_DESeq_1_3 <- prune_taxa(sigASVs_vec_hindgut_1_3 ,hindgut_genus_1_3) sigASVs_hindgut_1_3 <- tax_table(hindgut_DESeq_1_3) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs_hindgut_1_3) %>% arrange(log2FoldChange) %>% mutate(Genus = ifelse(Genus=='g__uncultured', paste(Family,"uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__uncultured', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__Unknown_Family', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% arrange(-log2FoldChange) # regions 3_6 hindgut_DESeq_3_6 <- prune_taxa(sigASVs_vec_hindgut_3_6 ,hindgut_genus_3_6) sigASVs_hindgut_3_6 <- tax_table(hindgut_DESeq_3_6) %>% as.data.frame() %>% rownames_to_column(var="ASV") %>% right_join(sigASVs_hindgut_3_6) %>% arrange(log2FoldChange) %>% mutate(Genus = ifelse(Genus=='g__uncultured', paste(Family,"uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__uncultured', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__Unknown_Family', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% arrange(-log2FoldChange) # Plot differentially abundant taxa # MIDGUT # regions 1_6 ggplot(sigASVs_midgut_1_6) + geom_bar(aes(y= reorder(Genus, -log2FoldChange),x=log2FoldChange), stat="identity", color="dodgerblue3", fill="lightsteelblue2") + geom_errorbar(aes(y=Genus, xmin=log2FoldChange-lfcSE, xmax=log2FoldChange+lfcSE)) + theme_bw(base_size = 12) + theme(axis.text.y = element_text(hjust=1, vjust=0.5, size=9), axis.title = element_text(size = 11)) + labs(x="log2 Fold Change", y="") + xlim(-10,10) ggsave("midgut_1_3.png", width=8, height=6) # regions 1_3 ggplot(sigASVs_midgut_1_3) + geom_bar(aes(y= reorder(Genus, -log2FoldChange),x=log2FoldChange), stat="identity", color="dodgerblue3", fill="lightsteelblue2") + geom_errorbar(aes(y=Genus, xmin=log2FoldChange-lfcSE, xmax=log2FoldChange+lfcSE)) + theme_bw(base_size = 12) + theme(axis.text.y = element_text(hjust=1, vjust=0.5, size=9), axis.title = element_text(size = 11)) + labs(x="log2 Fold Change", y="") + xlim(-10,10) ggsave("midgut_1_2.png", width=9, height=2.5) # regions 3_6 ggplot(sigASVs_midgut_3_6) + geom_bar(aes(y= reorder(Genus, -log2FoldChange),x=log2FoldChange), stat="identity", color="dodgerblue3", fill="lightsteelblue2") + geom_errorbar(aes(y=Genus, xmin=log2FoldChange-lfcSE, xmax=log2FoldChange+lfcSE)) + theme_bw(base_size = 12) + theme(axis.text.y = element_text(hjust=1, vjust=0.5, size=9), axis.title = element_text(size = 11)) + labs(x="log2 Fold Change", y="") + xlim(-10,10) ggsave("midgut_2_3.png", width=9, height=2.2) # HINDGUT # regions 1_6 ggplot(sigASVs_hindgut_1_6) + geom_bar(aes(y= reorder(Genus, -log2FoldChange),x=log2FoldChange), stat="identity", color="dodgerblue3", fill="lightsteelblue2") + geom_errorbar(aes(y=Genus, xmin=log2FoldChange-lfcSE, xmax=log2FoldChange+lfcSE)) + theme_bw(base_size = 12) + theme(axis.text.y = element_text(hjust=1, vjust=0.5, size=9), axis.title = element_text(size = 11)) + labs(x="log2 Fold Change", y="") + xlim(-10,10) ggsave("hindgut_1_3.png", width=7, height=3) # regions 1_3 ggplot(sigASVs_hindgut_1_3) + geom_bar(aes(y= reorder(Genus, -log2FoldChange),x=log2FoldChange), stat="identity", color="dodgerblue3", fill="lightsteelblue2") + geom_errorbar(aes(y=Genus, xmin=log2FoldChange-lfcSE, xmax=log2FoldChange+lfcSE)) + theme_bw(base_size = 12) + theme(axis.text.y = element_text(hjust=1, vjust=0.5, size=9), axis.title = element_text(size = 11)) + labs(x="log2 Fold Change", y="") + xlim(-10,10) ggsave("hindgut_1_2.png", width=7, height=1.25) # regions 3_6 ggplot(sigASVs_hindgut_3_6) + geom_bar(aes(y= reorder(Genus, -log2FoldChange),x=log2FoldChange), stat="identity", color="dodgerblue3", fill="lightsteelblue2") + geom_errorbar(aes(y=Genus, xmin=log2FoldChange-lfcSE, xmax=log2FoldChange+lfcSE)) + theme_bw(base_size = 12) + theme(axis.text.y = element_text(hjust=1, vjust=0.5, size=9), axis.title = element_text(size = 11)) + labs(x="log2 Fold Change", y="") + xlim(-10,10) ggsave("hindgut_2_3.png", width=7, height=1.1) #### RELATIVE ABUNDANCE ##### # Load packages library(phyloseq) library(ggplot2) library(ggprism) # Plot relative abundance for midgut samples # region 1_6 ps_rel_16 <- transform_sample_counts(midgut_filtered_final_1_6, function(x) x/sum(x)) ps_rel_16 %>% psmelt() %>% filter(OTU %in% sigASVs_midgut_1_6$ASV) %>% dplyr::rename(`Habitat Region`= habitat_region) %>% as.data.frame() %>% mutate(Genus = ifelse(Genus=='g__uncultured', paste(Family,"uncl",sep='.'),Genus)) %>% ggplot(aes(Genus,Abundance,fill=`Habitat Region`)) + geom_boxplot() + theme_grey(base_size = 12) + theme(axis.text.x = element_text(vjust = 0.5, hjust=1, size=9), axis.title = element_text(size = 11)) + scale_fill_discrete(labels=c("Region 1", "Region 3")) + labs(y="Relative Abundance",x="")+ coord_flip() ggsave("midgut_1_3_RA.png", width=10, height=8) # region 1_3 ps_rel_13 <- transform_sample_counts(midgut_filtered_final_1_3, function(x) x/sum(x)) ps_rel_13 %>% psmelt() %>% filter(OTU %in% sigASVs_midgut_1_3$ASV) %>% dplyr::rename(`Habitat Region`= habitat_region) %>% as.data.frame() %>% mutate(Genus = ifelse(Genus=='g__uncultured', paste(Family,"uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__Unknown_Family', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% ggplot(aes(Genus,Abundance,fill=`Habitat Region`)) + geom_boxplot() + theme_gray(base_size = 12) + theme(axis.text.x = element_text(vjust = 0.5, hjust=1, size=9), axis.title = element_text(size = 11)) + scale_fill_discrete(labels=c("Region 1", "Region 2")) + labs(y="Relative Abundance",x="") + coord_flip() ggsave("midgut_1_2_RA.png", width=9, height=3) # region 3_6 ps_rel_36 <- transform_sample_counts(midgut_filtered_final_3_6, function(x) x/sum(x)) ps_rel_36 %>% psmelt() %>% filter(OTU %in% sigASVs_midgut_3_6$ASV) %>% dplyr::rename(`Habitat Region`=habitat_region) %>% as.data.frame() %>% mutate(Genus = ifelse(Genus=='g__uncultured', paste(Family,"uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__uncultured', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% mutate(Genus = ifelse(Family=='f__Unknown_Family', paste(Order,"f_g_uncl",sep='.'),Genus)) %>% ggplot(aes(Genus,Abundance,fill=`Habitat Region`)) + geom_boxplot() + theme_gray(base_size = 12) + theme(axis.text.x = element_text(vjust = 0.5, hjust=1, size=9), axis.title = element_text(size = 11)) + scale_fill_discrete(labels=c("Region 2", "Region 3")) + labs(y="Relative Abundance",x="") + coord_flip() ggsave("midgut_2_3_RA.png", width=9, height=2.7)