# Create and navigate to directory for project mkdir /data/infant cd /data/infant # Import demultiplexed data into project directory qiime tools import \ --type 'SampleData[SequencesWithQuality]' \ --input-path /mnt/datasets/project_2/infant/infant_manifest.txt \ --output-path demux.qza \ --input-format SingleEndFastqManifestPhred33V2 # Visualization of demultiplexed samples qiime demux summarize \ --i-data demux.qza \ --o-visualization demux.qzv # Sequence quality control and determine ASV with DADA2 qiime dada2 denoise-single \ --i-demultiplexed-seqs demux.qza \ --p-trim-left 0 \ --p-trunc-len 150 \ --o-representative-sequences rep-seqs.qza \ --o-table table-dada2.qza \ --o-denoising-stats stats-dada2.qza # Create visualization of DADA2 stats qiime metadata tabulate \ --m-input-file stats-dada2.qza \ --o-visualization stats-dada2.qzv # Generate feature table qiime feature-table summarize \ --i-table table.qza \ --o-visualization table.qzv # Map feature IDs to sequences qiime feature-table tabulate-seqs \ --i-data rep-seqs.qza \ --o-visualization rep-seqs.qzv # Generate tree for phylogenetic diversity analyses qiime phylogeny align-to-tree-mafft-fasttree \ --i-sequences rep-seqs.qza \ --o-alignment aligned-rep-seqs.qza \ --o-masked-alignment masked-aligned-rep-seqs.qza \ --o-tree unrooted-tree.qza \ --o-rooted-tree rooted-tree.qza # Alpha-rarefaction to correct sampling depth qiime diversity alpha-rarefaction \ --i-table table.qza \ --i-phylogeny rooted-tree.qza \ --p-max-depth 37000 \ --m-metadata-file infant_metadata.txt \ --o-visualization alpha-rarefaction.qzv # Calculate alpha- and beta-diversity metrics with 4250 sampling depth qiime diversity core-metrics-phylogenetic \ --i-phylogeny rooted-tree.qza \ --i-table table.qza \ --p-sampling-depth 4250 \ --m-metadata-file infant_metadata.txt \ --output-dir core_metrics_results_4250_depth # Download pre-trained Naive Bayes classifier trained # on Greengenes 13_8 99% OTUs wget "https://data.qiime2.org/2020.8/common/gg-13-8-99-515-806-nb-classifier.qza" # Generate taxonomy.qza with QIIME qiime feature-classifier classify-sklearn --i-classifier gg-13-8-99-515-806-nb-classifier.qza \ --i-reads rep-seqs.qza \ --o-classification taxonomy.qza # Export BIOM data from QIIME qiime tools export \ --input-path table.qza \ --output-path exported qiime tools export \ --input-path taxonomy.qza \ --output-path exported qiime tools export \ --input-path rooted-tree.qza \ --output-path exported # Edit the column names and change # `Feature ID` to `#OTUID` # `Taxon` to `taxonomy` # `Confidence` to `confidence` nano exported/taxonomy.tsv # Combine taxonomy with BIOM data biom add-metadata \ -i exported/feature-table.biom \ -o exported/table-with-taxonomy.biom \ --observation-metadata-fp exported/taxonomy.tsv \ --sc-separated taxonomy ################################################################## #MICB447 Project 2 #Author: Enoch Yau, Team 2 #This script used to answer questions regarding mode of delivery (behavioral and microbial differences) ##################################################################### #loading required packages library(ggplot2) library(tidyverse) ############################################ #data setup in this section #initial importing of data infant_metadata <- readxl::read_xlsx("infant_metadata.xlsx") #filter to only get metadata variables of interest from infants whose birth method is known #don't keep "enjoyment_food", "general_appetite", or "satiety_responsive", since these don't appear to be collected at the same timepoint infant_metadata_filtered <- subset(infant_metadata, life_stage=="Infant" & baby_delivery!="not collected") infant_metadata_filtered <- select(infant_metadata_filtered, c("#SampleID", "life_stage", "age_category", "anonymized_name", "baby_delivery","enjoyment_food_2_months", "enjoyment_food_2_weeks","enjoyment_food_4_months","enjoyment_food_6_months", "feed","general_appetite_2_months","general_appetite_2_weeks", "general_appetite_4_months","general_appetite_6_months", "satiety_responsive_2_months","satiety_responsive_2_weeks", "satiety_responsive_4_months","satiety_responsive_6_months", "wlz_2w", "wlz_2m", "wlz_4m", "wlz_6m","wlz_9m", "wlz_12m")) #convert feeding behavior metadata columns to numerical form, replacing "not collected" with NA (default setting) numerical_data <- c("enjoyment_food_2_months", "enjoyment_food_2_weeks", "enjoyment_food_4_months", "enjoyment_food_6_months","general_appetite_2_months", "general_appetite_2_weeks","general_appetite_4_months", "general_appetite_6_months", "satiety_responsive_2_months", "satiety_responsive_2_weeks","satiety_responsive_4_months", "satiety_responsive_6_months","wlz_2w", "wlz_2m", "wlz_4m", "wlz_6m","wlz_9m", "wlz_12m") infant_metadata_filtered[numerical_data]<- lapply(infant_metadata_filtered[numerical_data], as.numeric) #remove duplicated entries (entries with the same anonymized name) infant_metadata_filtered_no_duplicates <- infant_metadata_filtered[!duplicated(infant_metadata_filtered$anonymized_name),] #rearrange data so that a time variable is introduced into the frame and all the feeding behavior values are consolidated into 1 column #should make it easier to plot line graphs this way #create a data frame for each timepoint first FeedBhvr_2weeks <- data.frame("anonymized_name"=infant_metadata_filtered_no_duplicates$anonymized_name, "baby_delivery"=infant_metadata_filtered_no_duplicates$baby_delivery, "Time"="2 weeks", "general_appetite"=infant_metadata_filtered_no_duplicates$general_appetite_2_weeks, "enjoyment_food"=infant_metadata_filtered_no_duplicates$enjoyment_food_2_weeks, "satiety_responsive"=infant_metadata_filtered_no_duplicates$satiety_responsive_2_weeks, "wlz"=infant_metadata_filtered_no_duplicates$wlz_2w) FeedBhvr_2months <- data.frame("anonymized_name"=infant_metadata_filtered_no_duplicates$anonymized_name, "baby_delivery"=infant_metadata_filtered_no_duplicates$baby_delivery, "Time"="2 months", "general_appetite"=infant_metadata_filtered_no_duplicates$general_appetite_2_months, "enjoyment_food"=infant_metadata_filtered_no_duplicates$enjoyment_food_2_months, "satiety_responsive"=infant_metadata_filtered_no_duplicates$satiety_responsive_2_months, "wlz"=infant_metadata_filtered_no_duplicates$wlz_2m) FeedBhvr_4months <- data.frame("anonymized_name"=infant_metadata_filtered_no_duplicates$anonymized_name, "baby_delivery"=infant_metadata_filtered_no_duplicates$baby_delivery, "Time"="4 months", "general_appetite"=infant_metadata_filtered_no_duplicates$general_appetite_4_months, "enjoyment_food"=infant_metadata_filtered_no_duplicates$enjoyment_food_4_months, "satiety_responsive"=infant_metadata_filtered_no_duplicates$satiety_responsive_4_months, "wlz"=infant_metadata_filtered_no_duplicates$wlz_4m) FeedBhvr_6months <- data.frame("anonymized_name"=infant_metadata_filtered_no_duplicates$anonymized_name, "baby_delivery"=infant_metadata_filtered_no_duplicates$baby_delivery, "Time"="6 months", "general_appetite"=infant_metadata_filtered_no_duplicates$general_appetite_6_months, "enjoyment_food"=infant_metadata_filtered_no_duplicates$enjoyment_food_6_months, "satiety_responsive"=infant_metadata_filtered_no_duplicates$satiety_responsive_6_months, "wlz"=infant_metadata_filtered_no_duplicates$wlz_6m) FeedBhvr_9months <- data.frame("anonymized_name"=infant_metadata_filtered_no_duplicates$anonymized_name, "baby_delivery"=infant_metadata_filtered_no_duplicates$baby_delivery, "Time"="9 months", "general_appetite"=NA, "enjoyment_food"=NA, "satiety_responsive"=NA, "wlz"=infant_metadata_filtered_no_duplicates$wlz_9m) FeedBhvr_12months <- data.frame("anonymized_name"=infant_metadata_filtered_no_duplicates$anonymized_name, "baby_delivery"=infant_metadata_filtered_no_duplicates$baby_delivery, "Time"="12 months", "general_appetite"=NA, "enjoyment_food"=NA, "satiety_responsive"=NA, "wlz"=infant_metadata_filtered_no_duplicates$wlz_12m) #combine dataframes above into one FeedBhvr_Trend <- rbind(FeedBhvr_2weeks, FeedBhvr_2months, FeedBhvr_4months, FeedBhvr_6months, FeedBhvr_9months, FeedBhvr_12months) #set the data frame so that time is properly ordered (instead of the default alphanumeric order) FeedBhvr_Trend$Time <- factor(FeedBhvr_Trend$Time, c("2 weeks", "2 months", "4 months", "6 months", "9 months", "12 months")) ################################################################################ #Setup for and plotting data in this section #creating functions to calculate t test p values in a more concise way than a code line for each t test #functions will also make exporting p values into a dataframe for importing into figures easier #function description: determine p value of t-test to 3 digits for respective metadata category between baby_delivery methods at time x GA_pvalue_calc <- function(x){round(t.test(general_appetite~baby_delivery, FeedBhvr_Trend[FeedBhvr_Trend$Time==x,])$p.value, digits=3)} EF_pvalue_calc <- function(x){round(t.test(enjoyment_food~baby_delivery, FeedBhvr_Trend[FeedBhvr_Trend$Time==x,])$p.value, digits=3)} SR_pvalue_calc <- function(x){round(t.test(satiety_responsive~baby_delivery, FeedBhvr_Trend[FeedBhvr_Trend$Time==x,])$p.value, digits=3)} #creating vector of timepoints so i can run the function at each time using sapply timepoints=c("2 weeks", "2 months", "4 months", "6 months") #making dataframe containing all p values from t-tests so i can add values to timewise plot pvalue_results <- data.frame("Time"=c(timepoints), EFpvalue=sapply(timepoints, EF_pvalue_calc), GApvalue=sapply(timepoints, GA_pvalue_calc), SRpvalue=sapply(timepoints, SR_pvalue_calc)) #BH correction for p values pvalue_adj <- data.frame("Time"=c(timepoints), EFpvalue=round(p.adjust(pvalue_results$EFpvalue, method="BH"), digits=3), GApvalue=round(p.adjust(pvalue_results$GApvalue, method="BH"), digits=3), SRpvalue=round(p.adjust(pvalue_results$SRpvalue, method="BH"), digits=3)) #General Appetite line graph ggplot(FeedBhvr_Trend,aes(x=Time)) + stat_summary(aes(y=general_appetite, colour=baby_delivery, group=baby_delivery), fun=mean, geom="line") + stat_summary(aes(y=general_appetite, shape=baby_delivery, colour=baby_delivery), fun=mean, geom="point", size=3) + scale_shape_manual(values=c(15,17)) + stat_summary(aes(y=general_appetite, colour=baby_delivery), fun.data=mean_se, geom="errorbar", width=0.1) + scale_colour_manual(values=c("blue", "red")) + theme_classic() + labs(x="Infant Age",y="General Appetite", shape="Mode of Delivery", colour="Mode of Delivery") + theme(plot.title = element_text(hjust=0.5)) + coord_cartesian(ylim=c(3.5,4.7)) + scale_x_discrete(limits=c("2 weeks", "2 months", "4 months", "6 months")) + geom_text(data=pvalue_adj, aes(x=Time,y=3.55, label=paste("p=",GApvalue))) + theme(legend.position=c(0.16, 0.87), legend.box.background = element_rect(size=1.2)) + theme(text=element_text(size=17)) #Food Enjoyment line graph ggplot(FeedBhvr_Trend,aes(x=Time)) + stat_summary(aes(y=enjoyment_food, colour=baby_delivery, group=baby_delivery), fun=mean, geom="line") + stat_summary(aes(y=enjoyment_food, shape=baby_delivery, colour=baby_delivery), fun=mean, geom="point", size=3) + scale_shape_manual(values=c(15,17)) + stat_summary(aes(y=enjoyment_food, colour=baby_delivery), fun.data=mean_se, geom="errorbar", width=0.1) + scale_colour_manual(values=c("blue", "red")) + theme_classic() + labs(x="Infant Age",y="Food Enjoyment", shape="Mode of Delivery", colour="Mode of Delivery") + theme(plot.title = element_text(hjust=0.5)) + coord_cartesian(ylim=c(4.2,4.9)) + scale_x_discrete(limits=c("2 weeks", "2 months", "4 months", "6 months")) + geom_text(data=pvalue_adj, aes(x=Time,y=4.2, label=paste("p=",EFpvalue))) + theme(legend.position=c(0.85, 0.85), legend.box.background = element_rect(size=1.2)) + theme(text=element_text(size=17)) #Satiety line graph ggplot(FeedBhvr_Trend,aes(x=Time)) + stat_summary(aes(y=satiety_responsive, colour=baby_delivery, group=baby_delivery), fun=mean, geom="line") + stat_summary(aes(y=satiety_responsive, shape=baby_delivery, colour=baby_delivery), fun=mean, geom="point", size=3) + scale_shape_manual(values=c(15,17)) + stat_summary(aes(y=satiety_responsive, colour=baby_delivery), fun.data=mean_se, geom="errorbar", width=0.1) + scale_colour_manual(values=c("blue", "red")) + theme_classic() + labs(x="Infant Age",y="Satiety-Responsiveness", shape="Mode of Delivery", colour="Mode of Delivery") + theme(plot.title = element_text(hjust=0.5)) + coord_cartesian(ylim=c(1.6,2.5)) + scale_x_discrete(limits=c("2 weeks", "2 months", "4 months", "6 months")) + geom_text(data=pvalue_adj, aes(x=Time,y=1.61, label=paste("p=",SRpvalue))) + theme(legend.position=c(0.85, 0.85), legend.box.background = element_rect(size=1.2)) + theme(text=element_text(size=17)) ################################################################################## #this section for wlz analysis between modes of delivery #testing significance of wlz first using t tests #function to determine p value of t-test between modes of delivery at time y, as above wlz_pvalue_calc <- function(x){round(t.test(wlz~baby_delivery, FeedBhvr_Trend[FeedBhvr_Trend$Time==x,])$p.value, digits=3)} #making dataframe so i can add p values to timewise plot, as above timepoints_long <- c(timepoints,"9 months", "12 months") wlz_pvalue_results <- data.frame("Time"=timepoints_long, pvalue=sapply(timepoints_long, wlz_pvalue_calc)) wlz_padj <- data.frame("Time"=timepoints_long, pvalue=round(p.adjust(wlz_pvalue_results$pvalue, method="BH"), digits=3)) #plotting wlz over time between modes of delivery ggplot(FeedBhvr_Trend,aes(x=Time)) + stat_summary(aes(y=wlz, colour=baby_delivery, group=baby_delivery), fun=mean, geom="line") + stat_summary(aes(y=wlz, shape=baby_delivery, colour=baby_delivery), fun=mean, geom="point", size=3) + scale_shape_manual(values=c(15,17)) + stat_summary(aes(y=wlz, colour=baby_delivery), fun.data=mean_se, geom="errorbar", width=0.1) + scale_colour_manual(values=c("blue", "red")) + theme_classic() + labs(x="Infant Age",y="Weight/Length Z-score", shape="Mode of Delivery", colour="Mode of Delivery") + theme(plot.title = element_text(hjust=0.5)) + coord_cartesian(ylim=c(-1,1.2)) + geom_text(data=wlz_padj, aes(x=Time,y=-0.9, label=paste("p=",pvalue))) + theme(legend.position=c(0.17, 0.8), legend.box.background = element_rect(size=1.2)) + theme(text=element_text(size=17)) ######################################################################################## #calculating n of data points and making a dataframe out of it for reference later on #making a function to count the samples from babies born via delivery method y, at time x and looking at feeding behavior z n_calc <- function(x,y,z){ number <- nrow(filter(FeedBhvr_Trend,baby_delivery==y & Time==x & z!="NA")) print(number) } #function to run n_calc at all timepoints and both modes of delivery, for feeding behavior a count_list <- function(a){c(sapply(timepoints_long,n_calc, y="vaginal", z=a), sapply(timepoints_long,n_calc, y="c-section", z=a))} #making the data frame of the n's FeedBhvr_Summary <- data.frame("baby_delivery"=c(rep("vaginal",6),rep("c-section",6)), "Time"=c(rep(timepoints_long,2)), "general_appetite_count"=count_list(FeedBhvr_Trend$general_appetite), "enjoyment_food_count"=count_list(FeedBhvr_Trend$enjoyment_food), "satiety_responsiveness_count"=count_list(FeedBhvr_Trend$satiety_responsive), "wlz_count"=count_list(FeedBhvr_Trend$wlz)) ################################################################################################# #This section for beta diversity analysis #packages needed for this section library(vegan) library(phyloseq) library(DESeq2) #import files used to create physeq object biom_file <- import_biom("table-with-taxonomy.biom") metadata <- import_qiime_sample_data("infant_metadata.txt") tree <- read_tree_greengenes("tree.nwk") #create physeq object physeq <- merge_phyloseq(biom_file, metadata, tree) #converting taxonomic rank to names colnames(tax_table(physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") #only keeping samples from infants, and samples where baby_delivery is known infant <- subset_samples(physeq, life_stage=="Infant" & baby_delivery!="not collected") #set seed for reproducibility set.seed(1124) #setting functions and values for later calculate_RA <- function(x) x/sum(x) ######################################################### #making PCoA plots #rarefy OTU table so all samples have same size physeq_rar <- rarefy_even_depth(infant, sample.size=4250) #transforming abundance values to relative abundance physeq_rar_RA <- transform_sample_counts(physeq_rar, calculate_RA) #unweighted unifrac analysis and resulting PCoA plot unifrac <- ordinate(physeq_rar_RA, method="PCoA", distance="unifrac") plot_ordination(physeq_rar_RA, unifrac, type="sample", color="baby_delivery") + scale_color_manual(values=c("blue","red")) + stat_ellipse(type="norm", size=1) + theme_classic() + theme(legend.position=c(0.87, 0.82), legend.box.background = element_rect(size=1.2)) + labs(color="Mode of Birth") + theme(text=element_text(size=17)) #weighted unifrac and resulting PCoA wunifrac <- ordinate(physeq_rar_RA, method="PCoA", distance="wunifrac") plot_ordination(physeq_rar_RA, wunifrac, type="sample", color="baby_delivery") + scale_color_manual(values=c("blue","red"))+ stat_ellipse(type="norm", size=1) + theme_classic() + theme(legend.position=c(0.87, 0.15), legend.box.background = element_rect(size=1.2)) + labs(color="Mode of Birth") + theme(text=element_text(size=17)) #jaccard and resulting PCoA jaccard <- ordinate(physeq_rar_RA, method="PCoA", distance="jaccard") plot_ordination(physeq_rar_RA, jaccard, type="sample", color="baby_delivery") + scale_color_manual(values=c("blue","red"))+ stat_ellipse(type="norm", size=1) + theme_classic() + theme(legend.position=c(0.9, 0.885), legend.box.background = element_rect(size=1.2)) + labs(color="Mode of Birth") + theme(text=element_text(size=17)) #Bray-Curtis and resulting PCoA bray <- ordinate(physeq_rar_RA, method="PCoA", distance="bray") plot_ordination(physeq_rar_RA, bray, type="sample", color="baby_delivery") + scale_color_manual(values=c("blue","red"))+ stat_ellipse(type="norm", size=1) + theme_classic() + theme(legend.position=c(0.9, 0.9), legend.box.background = element_rect(size=1.2)) + labs(color="Mode of Birth") + theme(text=element_text(size=17)) ################################################################################# #this commented section done in QIIME2: ANCOM setup and running at the genus level between modes of delivery #for supplemental figure ## Collapse filtered feature table to genus level # qiime taxa collapse \ # --i-table feature_frequency_infant_filtered_table.qza \ # --i-taxonomy taxonomy.qza \ # --p-level 6 \ # --o-collapsed-table feature_frequency_infant_filtered_table_l6.qza # ## Create composition artifact for differential abundance analysis # qiime composition add-pseudocount \ # --i-table feature_frequency_infant_filtered_table_l6.qza \ # --o-composition-table comp_frequency_infant_table_l6.qza # ## ANCOM on baby delivery column # qiime composition ancom \ # --i-table comp_frequency_infant_table_l6.qza \ # --m-metadata-file infant_metadata.txt \ # --m-metadata-column baby_delivery \ # --o-visualization l6_ancom_delivery.qzv ################################################################################## #this section to plot the ANCOM results in R #L6_ancom_delivery.qzv file from above section exported as ANCOM.tsv in view.qiime2.org #importing ANCOM.tsv ancom <- read.table(file="ANCOM.tsv", sep='\t', header=TRUE) #visualize ancom as a scatterplot ggplot(ancom,aes(y=W, x=clr)) + geom_point() + theme_classic() + labs(x="Centered Log Ratio Transformation",y="W statistic") + xlim(0,12) + ylim(0, 210) + theme(panel.border=element_rect(color="black", fill=NA)) + annotate("text", x=10.3, y=185,label="Bacteroides", size=5) + theme(text=element_text(size=17)) ################################################################################# #this section for Bacteroides Relative Abundance Analysis #subsetting to genera Genus_RA <- tax_glom(physeq_rar_RA, taxrank = "Genus", NArm = FALSE) #subsetting to Bacteroides Bacteroides <- subset_taxa(Genus_RA, Genus == "g__Bacteroides") #rearranging object so it can be plotted Bacteroides_melt <- psmelt(Bacteroides) #plotting relative abundance results ggplot(Bacteroides_melt, aes(x = baby_delivery, y = Abundance)) + geom_boxplot(aes(fill = baby_delivery)) + theme_classic() + scale_fill_manual(values=c("blue", "red")) + labs(x="Mode of Delivery", y="Relative Abundance", fill="Mode of Delivery", colour="Mode of Delivery") + theme(text=element_text(size=17)) + theme(legend.position="none") #Correlational Studies library(ggplot2) library(tidyverse) library(EnvStats) library(ggsignif) library(broom) library(ggpubr) infant_metadata <- readxl::read_xlsx("infant_metadata.xlsx", col_names = TRUE) #Filter metadata down to needed columns infant_metadata_filter <- infant_metadata infant_metadata_filter <- select(infant_metadata_filter, c("#SampleID", "life_stage", "age_category", "anonymized_name", "baby_delivery", "host_weight", "infant_waz", "infant_waz_comp_to_median", "infant_wlz", "infant_wlz_comp_to_median", "mom_bmi", "wlz_2m", "wlz_2w", "wlz_4m", "wlz_6m", "wlz_9m", "wlz_12m", "wlz_avg_bin", "satiety_responsive", "satiety_responsive_2_months", "satiety_responsive_2_weeks", "satiety_responsive_4_months", "satiety_responsive_6_months", "satiety_responsive_comp_to_median", "feed", "sex")) # BMI reference: https://www.nih.gov/news-events/news-releases/nih-study-identifies-ideal-body-mass-index#:~:text=Current%20guidelines%20from%20the%20U.S.,as%20BMI%2035%20or%20higher. #Sort data based on BMI #Set variable x <- infant_metadata_filter$mom_bmi #Mutate to make a new column: Weight_Category infant_metadata_filter_cat_BMI <- mutate(infant_metadata_filter, Mother_Weight_Category = ifelse((x == "not applicable"), "NA", ifelse((x < 18.5), "underweight", ifelse((x >= 18.5 & x <24.9), "normal", ifelse((x >= 24.9 & x <= 29.9), "overweight", ifelse((x >= 30), "obese", "NA")))))) #Subdivide data based on Mother's Weight_Category --> result: 4 datasets based on mother's weight #Note: 0 underweight mothers: 36 normal, 42 obese, 35 overweight #Note: ONLY CONTAINS MOTHERS underweight_mom_only <- filter(infant_metadata_filter_cat_BMI, Mother_Weight_Category == "underweight") normal_mom_only <- filter(infant_metadata_filter_cat_BMI, Mother_Weight_Category == "normal") overweight_mom_only <- filter(infant_metadata_filter_cat_BMI, Mother_Weight_Category == "overweight") obese_mom_only <- filter(infant_metadata_filter_cat_BMI, Mother_Weight_Category == "obese") # note: the data above is filtered down to MOTHERS ONLY. # take the anonymized_name (which is the same between mother and infant) # then apply it to the infant_metadata_filter_cat_BMI to keep both baby and mom normal_mom_AND_infant <- filter(infant_metadata_filter_cat_BMI, anonymized_name %in% normal_mom_only$anonymized_name) overweight_mom_AND_infant <- filter(infant_metadata_filter_cat_BMI, anonymized_name %in% overweight_mom_only$anonymized_name) obese_mom_AND_infant <- filter(infant_metadata_filter_cat_BMI, anonymized_name %in% obese_mom_only$anonymized_name) #Generate infant ONLY data + add mom weight description normal_mom_infant_only <- subset(normal_mom_AND_infant, life_stage == "Infant") normal_mom_infant_only <- mutate(normal_mom_infant_only, mom_weight_cat = ifelse(normal_mom_infant_only$Mother_Weight_Category == "NA", "normal", "failed")) overweight_mom_infant_only <- subset(overweight_mom_AND_infant, life_stage == "Infant") overweight_mom_infant_only <- mutate(overweight_mom_infant_only, mom_weight_cat = ifelse(overweight_mom_infant_only$Mother_Weight_Category == "NA", "overweight", "failed")) obese_mom_infant_only <- subset(obese_mom_AND_infant, life_stage == "Infant") obese_mom_infant_only <- mutate(obese_mom_infant_only, mom_weight_cat = ifelse(obese_mom_infant_only$Mother_Weight_Category == "NA", "obese", "failed")) infant_only_norm_over <- merge(normal_mom_infant_only, overweight_mom_infant_only, all = TRUE) infant_only_combined <- merge(infant_only_norm_over, obese_mom_infant_only, all = TRUE) ################################## satiety_2weeks <- data.frame("anonymized_name" = infant_only_combined$anonymized_name, "feed" = infant_only_combined$feed, "satiety" = infant_only_combined$satiety_responsive_2_weeks, "mom_bmi" = infant_only_combined$mom_weight_cat, "baby_delivery" = infant_only_combined$baby_delivery, "sex" = infant_only_combined$sex, "time" = "2 weeks") satiety_2months <- data.frame("anonymized_name" = infant_only_combined$anonymized_name, "feed" = infant_only_combined$feed, "satiety" = infant_only_combined$satiety_responsive_2_months, "mom_bmi" = infant_only_combined$mom_weight_cat, "baby_delivery" = infant_only_combined$baby_delivery, "sex" = infant_only_combined$sex, "time" = "2 months") satiety_4months <- data.frame("anonymized_name" = infant_only_combined$anonymized_name, "feed" = infant_only_combined$feed, "satiety" = infant_only_combined$satiety_responsive_4_months, "mom_bmi" = infant_only_combined$mom_weight_cat, "baby_delivery" = infant_only_combined$baby_delivery, "sex" = infant_only_combined$sex, "time" = "4 months") satiety_6months <- data.frame("anonymized_name" = infant_only_combined$anonymized_name, "feed" = infant_only_combined$feed, "satiety" = infant_only_combined$satiety_responsive_6_months, "mom_bmi" = infant_only_combined$mom_weight_cat, "baby_delivery" = infant_only_combined$baby_delivery, "sex" = infant_only_combined$sex, "time" = "6 months") satiety_combined <- rbind(satiety_2weeks, satiety_2months, satiety_4months, satiety_6months) satiety_combined[3] <- lapply(satiety_combined[3], as.numeric) satiety_combined_vaginal <- filter(satiety_combined, baby_delivery == "vaginal") #Not filtered by vaginal birth ggplot(satiety_combined, aes(y = satiety, x = mom_bmi)) + facet_wrap(~time) + stat_n_text() + theme_bw() + geom_boxplot() + labs(y = "Infant Satiety Responsiveness", x = "Maternal Weight Class", title = "ALL Infant Satiety responsiveness over time") satiety_combined_vaginal$time <- factor(satiety_combined_vaginal$time, levels=c("2 weeks", "2 months", "4 months", "6 months")) satiety_combined_vaginal$mom_bmi <- factor(satiety_combined_vaginal$mom_bmi, levels=c("normal", "overweight", "obese")) #filtered by vaginal birth (Figure 3B) ggplot(satiety_combined_vaginal, aes(y = satiety, x = mom_bmi)) + facet_wrap( ~ time) + stat_n_text(y.pos = 0) + theme_bw() + theme(text=element_text(size=17)) + geom_boxplot() + coord_cartesian(ylim = c(0, 5)) + labs(y = "Infant Satiety Responsiveness", x = "Maternal Weight Class", title = "Vaginally delivered Infant Satiety responsiveness over time") ####### WLZ wlz_2weeks <- data.frame("anonymized_name" = infant_only_combined$anonymized_name, "feed" = infant_only_combined$feed, "wlz" = infant_only_combined$wlz_2w, "mom_bmi" = infant_only_combined$mom_weight_cat, "baby_delivery" = infant_only_combined$baby_delivery, "sex" = infant_only_combined$sex, "time" = "2 weeks") wlz_2months <- data.frame("anonymized_name" = infant_only_combined$anonymized_name, "feed" = infant_only_combined$feed, "wlz" = infant_only_combined$wlz_2m, "mom_bmi" = infant_only_combined$mom_weight_cat, "baby_delivery" = infant_only_combined$baby_delivery, "sex" = infant_only_combined$sex, "time" = "2 months") wlz_4months <- data.frame("anonymized_name" = infant_only_combined$anonymized_name, "feed" = infant_only_combined$feed, "wlz" = infant_only_combined$wlz_4m, "mom_bmi" = infant_only_combined$mom_weight_cat, "baby_delivery" = infant_only_combined$baby_delivery, "sex" = infant_only_combined$sex, "time" = "4 months") wlz_6months <- data.frame("anonymized_name" = infant_only_combined$anonymized_name, "feed" = infant_only_combined$feed, "wlz" = infant_only_combined$wlz_6m, "mom_bmi" = infant_only_combined$mom_weight_cat, "baby_delivery" = infant_only_combined$baby_delivery, "sex" = infant_only_combined$sex, "time" = "6 months") wlz_12months <- data.frame("anonymized_name" = infant_only_combined$anonymized_name, "feed" = infant_only_combined$feed, "wlz" = infant_only_combined$wlz_12m, "mom_bmi" = infant_only_combined$mom_weight_cat, "baby_delivery" = infant_only_combined$baby_delivery, "sex" = infant_only_combined$sex, "time" = "12 months") wlz_combined <- rbind(wlz_2weeks, wlz_2months, wlz_4months, wlz_6months, wlz_12months) wlz_combined[3] <- lapply(wlz_combined[3], as.numeric) wlz_combined_vaginal <- filter(wlz_combined, baby_delivery == "vaginal") wlz_combined$time <- factor(wlz_combined$time, levels=c("2 weeks", "2 months", "4 months", "6 months", "12 months")) wlz_combined$mom_bmi <- factor(wlz_combined$mom_bmi, levels=c("normal", "overweight", "obese")) wlz_combined_vaginal$time <- factor(wlz_combined_vaginal$time, levels=c("2 weeks", "2 months", "4 months", "6 months", "12 months")) wlz_combined_vaginal$mom_bmi <- factor(wlz_combined_vaginal$mom_bmi, levels=c("normal", "overweight", "obese")) #Figure plot (3A) ggplot(wlz_combined_vaginal, aes(y = wlz, x = mom_bmi)) + facet_wrap(~ time) + stat_n_text() + theme_bw() + theme(text=element_text(size=17)) + geom_boxplot() + coord_cartesian(ylim = c(-5, 4)) + labs(y = "Infant Weight-for-length Z score", x = "Maternal Weight Class", title = "Vaginally delivered infant weight-for-length Z score by maternal BMI") ####### #ANOVA stats #Anova tests: #2 weeks WLZ wlz_2weeks[3] <- lapply(wlz_2weeks[3], as.numeric) wlz_2weeks_filter <- filter(wlz_2weeks, baby_delivery == "vaginal") wlz_2weeks_anova <- lm(wlz ~ mom_bmi, data = wlz_2weeks_filter) anova(wlz_2weeks_anova) wlz_2weeks_anova_tukey <- TukeyHSD(aov(wlz_2weeks_anova)) tidy(wlz_2weeks_anova_tukey) #2 months WLZ wlz_2months[3] <- lapply(wlz_2months[3], as.numeric) wlz_2months_filter <- filter(wlz_2months, baby_delivery == "vaginal") wlz_2months_anova <- lm(wlz ~ mom_bmi, data = wlz_2months_filter) anova(wlz_2months_anova) wlz_2months_anova_tukey <- TukeyHSD(aov(wlz_2months_anova)) tidy(wlz_2months_anova_tukey) #4 months WLZ wlz_4months[3] <- lapply(wlz_4months[3], as.numeric) wlz_4months_filter <- filter(wlz_4months, baby_delivery == "vaginal") wlz_4months_anova <- lm(wlz ~ mom_bmi, data = wlz_4months_filter) anova(wlz_4months_anova) wlz_4months_anova_tukey <- TukeyHSD(aov(wlz_4months_anova)) tidy(wlz_4months_anova_tukey) #6 months WLZ wlz_6months[3] <- lapply(wlz_6months[3], as.numeric) wlz_6months_filter <- filter(wlz_6months, baby_delivery == "vaginal") wlz_6months_anova <- lm(wlz ~ mom_bmi, data = wlz_6months_filter) anova(wlz_6months_anova) wlz_6months_anova_tukey <- TukeyHSD(aov(wlz_6months_anova)) tidy(wlz_6months_anova_tukey) #12 months WLZ wlz_12months[3] <- lapply(wlz_12months[3], as.numeric) wlz_12months_filter <- filter(wlz_12months, baby_delivery == "vaginal") wlz_12months_anova <- lm(wlz ~ mom_bmi, data = wlz_12months_filter) anova(wlz_12months_anova) wlz_12months_anova_tukey <- TukeyHSD(aov(wlz_12months_anova)) tidy(wlz_12months_anova_tukey) ##################### #ANOVA satiety #Satiety 2 weeks satiety_2weeks[3] <- lapply(satiety_2weeks[3], as.numeric) satiety_2weeks_filter <- filter(satiety_2weeks, baby_delivery == "vaginal") satiety_2weeks_anova <- lm(satiety ~ mom_bmi, data = satiety_2weeks_filter) anova(satiety_2weeks_anova) satiety_2weeks_anova_tukey <- TukeyHSD(aov(satiety_2weeks_anova)) tidy(satiety_2weeks_anova_tukey) #2 months satiety_2months[3] <- lapply(satiety_2months[3], as.numeric) satiety_2months_filter <- filter(satiety_2months, baby_delivery == "vaginal") satiety_2months_anova <- lm(satiety ~ mom_bmi, data = satiety_2months_filter) anova(satiety_2months_anova) satiety_2months_anova_tukey <- TukeyHSD(aov(satiety_2months_anova)) tidy(satiety_2months_anova_tukey) #satiety 4 months satiety_4months[3] <- lapply(satiety_4months[3], as.numeric) satiety_4months_filter <- filter(satiety_4months, baby_delivery == "vaginal") satiety_4months_anova <- lm(satiety ~ mom_bmi, data = satiety_4months_filter) anova(satiety_4months_anova) satiety_4months_anova_tukey <- TukeyHSD(aov(satiety_4months_anova)) tidy(satiety_4months_anova_tukey) #satiety 6 months satiety_6months[3] <- lapply(satiety_6months[3], as.numeric) satiety_6months_filter <- filter(satiety_6months, baby_delivery == "vaginal") satiety_6months_anova <- lm(satiety ~ mom_bmi, data = satiety_6months_filter) anova(satiety_6months_anova) satiety_6months_anova_tukey <- TukeyHSD(aov(satiety_6months_anova)) tidy(satiety_6months_anova_tukey) ################################################################################################################## DATA WRANGLING FOR BETA DIVERSITY AND DAA ################################################################################################################# #this script filters infant_metadata.xlsx #this script conducts correlation analyses between infant Weight_for_length #and satiety responsiveness vs Maternal Weight Category library(tidyverse) library(dplyr) library(Hmisc) ################################################################################################################################################## #edit metadata to include maternal bmi category infant_metadata <- readxl::read_xlsx("infant_metadata.xlsx", col_names = TRUE) test <- select(infant_metadata_cat_BMI, "life_stage", "mom_bmi", "anonymized_name", "age_category") x <- infant_metadata$mom_bmi #Mutate to make a new column: Weight_Category infant_metadata_cat_BMI <- mutate(infant_metadata, Mother_Weight_Category = ifelse((x == "not applicable"), "NA", ifelse((x < 18.5), "underweight", ifelse((x >= 18.5 & x <24.9), "normal", ifelse((x >= 24.9 & x <= 29.9), "overweight", ifelse((x >= 30), "obese", "NA")))))) underweight_mom_only <- filter(infant_metadata_cat_BMI, Mother_Weight_Category == "underweight") normal_mom_only <- filter(infant_metadata_cat_BMI, Mother_Weight_Category == "normal") overweight_mom_only <- filter(infant_metadata_cat_BMI, Mother_Weight_Category == "overweight") obese_mom_only <- filter(infant_metadata_cat_BMI, Mother_Weight_Category == "obese") ####################################### #Design a system: Going from normal --> overweight --> obese #when anonymized name is used once, it's removed for future uses ####################################### used_list <- infant_metadata$anonymized_name used_list <- as.data.frame(used_list) #filter normal mothers and infants #take infants that have IDs the same as Normal mothers normal_mom_AND_infant <- filter(infant_metadata_cat_BMI, anonymized_name %in% normal_mom_only$anonymized_name) #a few mom_bmi for the same anonymized ID have bmi taken at DIFFERENT TIME PONITS #this results in 1 anonymized name having MULTIPLE BMI definitions (e.g. 0.5 months = obese, 4 months = overweight) #here, we filter based on Mother_weight_category being normal or NA #the same anonymized name will be categorized differently normal_mom_AND_infant <- filter(normal_mom_AND_infant, Mother_Weight_Category %in% c("normal", "NA")) check_normal1 <- select(normal_mom_AND_infant, "#SampleID", "mom_bmi", "Mother_Weight_Category", "anonymized_name", "life_stage") #Keep items that are NOT IN normal_mom_AND_infant$anonymized_name used_list <- filter(used_list, used_list %nin% normal_mom_AND_infant$anonymized_name) #same with overweight #this creates a new dataframe that includes only anonymized names that haven't been used overweight_filtered <- filter(overweight_mom_only, anonymized_name %in% used_list$used_list) overweight_mom_AND_infant <- filter(infant_metadata_cat_BMI, anonymized_name %in% overweight_filtered$anonymized_name) overweight_mom_AND_infant <- filter(overweight_mom_AND_infant, Mother_Weight_Category %in% c("overweight", "NA")) check_overweight1 <- select(overweight_mom_AND_infant, "#SampleID", "mom_bmi", "Mother_Weight_Category", "anonymized_name", "life_stage") #Keep items that are NOT IN normal_mom_AND_infant$anonymized_name AND overweght_mom_AND_infant$anonymized name used_list <- filter(used_list, used_list %nin% overweight_mom_AND_infant$anonymized_name) #used_list should contain only remaining ones for obese #same with obese obese_filtered <- filter(obese_mom_only, anonymized_name %in% used_list$used_list) obese_mom_AND_infant <- filter(infant_metadata_cat_BMI, anonymized_name %in% obese_filtered$anonymized_name) obese_mom_AND_infant <- filter(obese_mom_AND_infant, Mother_Weight_Category %in% c("overweight", "NA")) check_obese1 <- select(obese_mom_AND_infant, "#SampleID", "mom_bmi", "Mother_Weight_Category", "anonymized_name", "life_stage") #Generate infant ONLY data + add mom weight description normal_mom_infant_only <- subset(normal_mom_AND_infant, life_stage == "Infant") normal_mom_infant_only <- mutate(normal_mom_infant_only, mom_weight_cat = ifelse(normal_mom_infant_only$Mother_Weight_Category == "NA", "normal", "failed")) check_normal <- select(normal_mom_infant_only, "#SampleID", "mom_bmi", "mom_weight_cat") overweight_mom_infant_only <- subset(overweight_mom_AND_infant, life_stage == "Infant") overweight_mom_infant_only <- mutate(overweight_mom_infant_only, mom_weight_cat = ifelse(overweight_mom_infant_only$Mother_Weight_Category == "NA", "overweight", "failed")) obese_mom_infant_only <- subset(obese_mom_AND_infant, life_stage == "Infant") obese_mom_infant_only <- mutate(obese_mom_infant_only, mom_weight_cat = ifelse(obese_mom_infant_only$Mother_Weight_Category == "NA", "obese", "failed")) infant_only_norm_over <- merge(normal_mom_infant_only, overweight_mom_infant_only, all = TRUE) infant_only_combined <- merge(infant_only_norm_over, obese_mom_infant_only, all = TRUE) total_data <- merge(infant_only_combined, infant_metadata_cat_BMI, all = TRUE) #infant_metadata_cat_BMI is now has mothers classified as obese, overweight, normal, underweight #Save file write.table(total_data, file = "qiime_files/infant_metadata_infant_only_combined_with_mat_weight_cat.tsv", row.names=TRUE, sep="\t") ################################################################################################################## BETA DIVERSITY/PCOA PLOTS ################################################################################################################## #Differential abundance analysis #Goal: see if there are different microbes in obese infants vs overweight vs normal ** #this script conducts beta divresity analyses (Wunifrac) and plots PCoA plots #for mothers and vaginally delivered infants. library(tidyverse) library(vegan) library(phyloseq) library(DESeq2) #create gm_mean functions gm_mean <- function(x, na.rm = TRUE) { exp(sum(log(x[ x> 0]), na.rm = na.rm) / length(x)) } infant_mat_bmi <- read.table("qiime_files/infant_metadata_infant_only_combined_with_mat_weight_cat.tsv", header = TRUE, sep = "\t") #load file into R, load into an object, and then manipulate biom_file <- import_biom("qiime_files/table-with-taxonomy.biom") metadata <- import_qiime_sample_data("qiime_files/infant_metadata_infant_only_combined_with_mat_weight_cat.tsv") tree <- read_tree_greengenes("qiime_files/tree.nwk") #create phylseq object infant_physeq <- merge_phyloseq(biom_file, metadata, tree) set.seed(1124) #taxonomic rank names rank_names(infant_physeq) #get the column names of the tax table, and then rename them colnames(tax_table(infant_physeq)) <- c("Kingdom", "Phylum", "Class", "Order","Family", "Genus", "Species") #change the rank names. the names of the ranks are in the taxonomy table of the physeq object rank_names(infant_physeq) #beta diversity PCoA plot #diversity requres rarefied taxa table #we know from qiime that we should rarefy to 4250 physeq_rar <- rarefy_even_depth(infant_physeq, sample.size = 4250) #convert to RA (relative abundance) physeq_rar_RA <- transform_sample_counts(physeq_rar, function(x) x/sum(x)) #function(x) x/sum(x) means given x, divide x by the sum of all x's #we define the type of analysis with ordinate() and setting the #method = PCoA, setting distance to type of beta diversity analysis #you can change analysis to jaccard, bray-curtis and so on #unfiltered data ord <- ordinate(physeq = physeq_rar_RA, method = "PCoA", distance = "wunifrac") plot_ordination(physeq_rar_RA, ord, type = "sample", color = "mom_weight_cat", title = "PCoA Unfiltered (weighted unifrac)") #Filter based on metadata: mothers only mother_only_physeq <- subset_samples(infant_physeq, life_stage == "Adult") physeq_rar_mother_only <- rarefy_even_depth(mother_only_physeq, sample.size = 4250) physeq_rar_mother_onlyRA <- transform_sample_counts(physeq_rar_mother_only, function(x) x/sum(x)) mother_only_ord <- ordinate(physeq = physeq_rar_mother_onlyRA, method = "PCoA", distance = "wunifrac") #Figure 4A plot_ordination(physeq_rar_mother_onlyRA, mother_only_ord, type = "sample", color = "Mother_Weight_Category", title = "PCoA Mothers Only (Weighted Unifrac)") + theme_bw() + theme(text=element_text(size=17)) #filter based on metadata: infants only infant_only_physeq <- subset_samples(infant_physeq, life_stage == "Infant") infant_only_physeq <- subset_samples(infant_physeq, mom_weight_cat %in% c("obese", "overweight", "normal")) physeq_rar_infant_only <- rarefy_even_depth(infant_only_physeq, sample.size = 4250) physeq_rar_infant_onlyRA <- transform_sample_counts(physeq_rar_infant_only, function(x) x/sum(x)) infant_only_ord <- ordinate(physeq = physeq_rar_infant_onlyRA, method = "PCoA", distance = "wunifrac") plot_ordination(physeq_rar_infant_onlyRA, infant_only_ord, type = "sample", color = "mom_weight_cat", title = "PCoA Infants Only (Weighted Unifrac)", size) #Vaginal infants only infant_only_physeq_vaginal <- subset_samples(infant_only_physeq, baby_delivery == "vaginal") physeq_rar_infant_only_vaginal <- rarefy_even_depth(infant_only_physeq_vaginal, sample.size = 4250) physeq_rar_infant_only_vaginal_RA <- transform_sample_counts(physeq_rar_infant_only_vaginal, function(x) x/sum(x)) vaginal_infant_ord <- ordinate(physeq = physeq_rar_infant_only_vaginal_RA, method = "PCoA", distance = "wunifrac") #Figure 5A plot_ordination(physeq_rar_infant_only_vaginal_RA, vaginal_infant_ord, type = "sample", color = "mom_weight_cat", title = "PCoA Vaginal Infants only (Weighted Unifrac)") + theme_bw() + theme(text=element_text(size=17)) ################################################################################################################## DIFFERENTIAL ABUNDANCE/RELATIVE ABUNDANCE ################################################################################################################## #This script conducts differential abundance and #relative abundance analyses for mothers and vaginally delivered infants library(tidyverse) library(vegan) library(phyloseq) library(DESeq2) library(EnvStats) library(broom) ################################################################################ #Relative abundance calculate_RA <- function(x) x/sum(x) gm_mean <- function(x, na.rm = TRUE) { exp(sum(log(x[ x> 0]), na.rm = na.rm) / length(x)) } infant_mat_bmi <- read.table("qiime_files/infant_metadata_infant_only_combined_with_mat_weight_cat.tsv", header = TRUE, sep = "\t") #load file into R, load into an object, and then manipulate #import_biom is specialized function that helps us import biom files into R biom_file <- import_biom("qiime_files/table-with-taxonomy.biom") metadata <- import_qiime_sample_data("qiime_files/infant_metadata_infant_only_combined_with_mat_weight_cat.tsv") tree <- read_tree_greengenes("qiime_files/tree.nwk") infant_physeq <- merge_phyloseq(biom_file, metadata, tree) # Convert taxonomic rank from numbers to proper names colnames(tax_table(infant_physeq)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") # Keep only abundant ASVs # First determine counts of ASV across all samples total_counts <- taxa_sums(infant_physeq) #Calculate relative abundance for each ASV relative_abundance <- calculate_RA(total_counts) # Determine which ASVs are more abundant than X cutoff # Change this if you want a different cutoff (0.0001 = 0.01%) abundant <- relative_abundance > 0.0001 abundant_taxa <- prune_taxa(abundant, infant_physeq) family <- tax_glom(abundant_taxa, taxrank = "Family", NArm = FALSE) #Keep infant samples only infant_family <- subset_samples(family, life_stage == "Infant") infant_family_vaginal <- subset_samples(infant_family, baby_delivery == "vaginal") infant_family_vaginal_narm <- subset_samples(infant_family_vaginal, mom_weight_cat != "NA") infant_family_vaginal_overweight_norm <- subset_samples(infant_family_vaginal_narm, mom_weight_cat %in% c("normal", "overweight")) infant_family_vaginal_obese_norm <- subset_samples(infant_family_vaginal_narm, mom_weight_cat %in% c("normal", "obese")) infant_family_vaginal_obese_overweight <- subset_samples(infant_family_vaginal_narm, mom_weight_cat %in% c("obese", "overweight")) deseq_feature <- phyloseq_to_deseq2(infant_family_vaginal_narm, ~ mom_weight_cat) geo_means <- apply(counts(deseq_feature), 1, gm_mean) deseq_feature <- estimateSizeFactors(deseq_feature, geoMeans = geo_means) deseq_feature <- DESeq(deseq_feature, fitType="local") feature_diff_abund <- results(deseq_feature) # Define cutoff for the adjusted p-value alpha <- 0.05 # Reformat information as data frame including feature as variable significant_feature <- as_tibble(feature_diff_abund, rownames = "feature") # Keep only significant results and sort by adjusted p-value significant_feature <- filter(significant_feature, padj < alpha) significant_feature <- arrange(significant_feature, padj) # Get the taxonomic information as a data frame taxa_df <- as_tibble(as.data.frame(tax_table(infant_physeq)), rownames = "feature") # Combine the significant features with taxonomic classification significant_feature <- inner_join(significant_feature, taxa_df) dim(significant_feature) # Plot differential abundance significant_feature %>% ggplot(aes(x = log2FoldChange, y = Family)) + geom_col() + theme_bw() + labs(x = "log2FoldChange", y = "Family", title = "Vaginally Delivered Infants") physeq_RA_infant <- transform_sample_counts(infant_physeq, calculate_RA) family_RA_infant <- tax_glom(physeq_RA_infant, taxrank = "Family", NArm = FALSE) # graph erysipelotrichaceae and filter for that family with subset_taxa. erysipelotrichaceae <- subset_samples(family_RA_infant, life_stage == "Infant") erysipelotrichaceae <- subset_samples(erysipelotrichaceae, baby_delivery == "vaginal") erysipelotrichaceae <- subset_samples(erysipelotrichaceae, mom_weight_cat != "NA") erysipelotrichaceae <- subset_taxa(erysipelotrichaceae, Family == "f__Erysipelotrichaceae") # Change the table from wide to long format. erysipelotrichaceae_melt <- psmelt(erysipelotrichaceae) #Figure 5B ggplot(erysipelotrichaceae_melt, aes(x = as.factor(mom_weight_cat), y = Abundance)) + # Visualize data as Boxplot geom_boxplot(aes(fill = as.factor(mom_weight_cat))) + # Label for x-axis xlab("Mother Weight Class") + # Label for y-axis ylab("Relative Abundance") + # Plot title ggtitle("Erysipelotrichaceae") + # colors for the different groups (here subject 1 and 2) scale_fill_manual(values=c("seagreen3", "indianred1", "black")) + guides(fill = FALSE) + # Set a standard theme for plot theme_bw(base_size = 16) + stat_n_text() # graph xanthomonadaceae and filter for that family with subset_taxa. xanthomonadaceae <- subset_samples(family_RA_infant, life_stage == "Infant") xanthomonadaceae <- subset_samples(xanthomonadaceae, baby_delivery == "vaginal") xanthomonadaceae <- subset_samples(xanthomonadaceae, mom_weight_cat != "NA") xanthomonadaceae <- subset_taxa(xanthomonadaceae, Family == "f__Xanthomonadaceae") # change the table from wide to long format. xanthomonadaceae_melt <- psmelt(xanthomonadaceae) #Figure 5C ggplot(xanthomonadaceae_melt, aes(x = as.factor(mom_weight_cat), y = Abundance)) + # Visualize data as Boxplot geom_boxplot(aes(fill = as.factor(mom_weight_cat))) + # Label for x-axis xlab("Mother Weight Class") + # Label for y-axis ylab("Relative Abundance") + # Plot title ggtitle("Xanthomonadaceae") + # colors for the different groups (here subject 1 and 2) scale_fill_manual(values=c("seagreen3", "indianred1", "black")) + guides(fill = FALSE) + # Set a standard theme for plot theme_bw(base_size = 16) + stat_n_text() ######################### #mothers mothers_family <- subset_samples(family, life_stage == "Adult") mothers_family_obese_normal <- subset_samples(mothers_family, Mother_Weight_Category %in% c("obese", "normal")) mothers_family_overweight_normal <- subset_samples(mothers_family, Mother_Weight_Category %in% c("overweight", "normal")) mothers_family_obese_overweight <- subset_samples(mothers_family, Mother_Weight_Category %in% c("obese", "overweight")) deseq_feature_mother <- phyloseq_to_deseq2(mothers_family, ~ Mother_Weight_Category) geo_means_mother <- apply(counts(deseq_feature_mother), 1, gm_mean) deseq_feature_mother <- estimateSizeFactors(deseq_feature_mother, geoMeans = geo_means) deseq_feature_mother <- DESeq(deseq_feature_mother, fitType="local") feature_diff_abund_mother <- results(deseq_feature_mother) alpha <- 0.05 # Reformat information as data frame including feature as variable significant_feature_mother <- as_tibble(feature_diff_abund_mother, rownames = "feature") # Keep only significant results and sort by adjusted p-value significant_feature_mother <- filter(significant_feature_mother, padj < alpha) significant_feature_mother <- arrange(significant_feature_mother, padj) # Get the taxonomic information as a data frame taxa_df_mother <- as_tibble(as.data.frame(tax_table(infant_physeq)), rownames = "feature") # Combine the significant features with taxonomic classification significant_feature_mother <- inner_join(significant_feature_mother, taxa_df) # there 14 features that are different at FDR-corrected p < 0.05! dim(significant_feature_mother) # Plot differential abundance significant_feature_mother %>% ggplot(aes(x = log2FoldChange, y = Family)) + geom_col() + theme_bw() + labs(x = "log2FoldChange", y = "Family", title = "Differential Abundance in Mothers") ################ Relative Abundance physeq_RA_mother <- transform_sample_counts(infant_physeq, calculate_RA) family_RA_mother <- tax_glom(physeq_RA_mother, taxrank = "Family", NArm = FALSE) # graph paraprevotellaceae and filter for that family with subset_taxa. paraprevotellaceae <- subset_samples(family_RA_mother, life_stage == "Adult") paraprevotellaceae <- subset_samples(paraprevotellaceae, Mother_Weight_Category != "NA") paraprevotellaceae <- subset_taxa(paraprevotellaceae, Family == "f__[Paraprevotellaceae]") # plot the data by changing the table from wide to long format. paraprevotellaceae_melt <- psmelt(paraprevotellaceae) #Figure 4B ggplot(paraprevotellaceae_melt, aes(x = as.factor(Mother_Weight_Category), y = Abundance)) + # Visualize data as Boxplot geom_boxplot(aes(fill = as.factor(Mother_Weight_Category))) + # Label for x-axis xlab("Mother Weight Category") + # Label for y-axis ylab("Relative Abundance") + # Plot title ggtitle("Paraprevotellaceae") + # colors for the different groups (here subject 1 and 2) scale_fill_manual(values=c("seagreen3", "indianred1", "black")) + guides(fill = FALSE) + # Set a standard theme for plot theme_bw(base_size = 16) + stat_n_text() # Calculate alpha-diversity group significance qiime diversity alpha-group-significance \ --i-alpha-diversity core_metrics_results_4250_depth/shannon_vector.qza \ --m-metadata-file infant_metadata.txt \ --o-visualization core_metrics_results_4250_depth/shannon_group_significance.qzv qiime diversity alpha-group-significance \ --i-alpha-diversity core_metrics_results_4250_depth/evenness_vector.qza \ --m-metadata-file infant_metadata.txt \ --o-visualization core_metrics_results_4250_depth/evenness_group_significance.qzv qiime diversity alpha-group-significance \ --i-alpha-diversity core_metrics_results_4250_depth/observed_features_vector.qza \ --m-metadata-file infant_metadata.txt \ --o-visualization core_metrics_results_4250_depth/observed_features_group_significance.qzv qiime diversity alpha-group-significance \ --i-alpha-diversity core_metrics_results_4250_depth/faith_pd_vector.qza \ --m-metadata-file infant_metadata.txt \ --o-visualization core_metrics_results_4250_depth/faith_pd_group_significance.qzv # Calculate beta-diversity group significance qiime diversity beta-group-significance \ --i-distance-matrix core_metrics_results_4250_depth/bray_curtis_distance_matrix.qza \ --m-metadata-file infant_metadata.txt --m-metadata-column feed \ --o-visualization core_metrics_results_4250_depth/bray_curtis_feed_significance.qzv qiime diversity beta-group-significance \ --i-distance-matrix core_metrics_results_4250_depth/jaccard_distance_matrix.qza \ --m-metadata-file infant_metadata.txt --m-metadata-column feed \ --o-visualization core_metrics_results_4250_depth/jaccard_feed_significance.qzv qiime diversity beta-group-significance \ --i-distance-matrix core_metrics_results_4250_depth/weighted_unifrac_distance_matrix.qza \ --m-metadata-file infant_metadata.txt \ --m-metadata-column feed \ --o-visualization core_metrics_results_4250_depth/weighted_unifrac_feed_significance.qzv qiime diversity beta-group-significance \ --i-distance-matrix core_metrics_results_4250_depth/unweighted_unifrac_distance_matrix.qza \ --m-metadata-file infant_metadata.txt --m-metadata-column feed \ --o-visualization core_metrics_results_4250_depth/unweighted_unifrac_feed_significance.qzv # Generate PCoA plots qiime emperor plot \ --i-pcoa core_metrics_results_4250_depth/unweighted_unifrac_pcoa_results.qza \ --m-metadata-file infant_metadata.txt \ --o-visualization core_metrics_results_4250_depth/unweighted_unifrac_emperor qiime emperor plot \ --i-pcoa core_metrics_results_4250_depth/weighted_unifrac_pcoa_results.qza \ --m-metadata-file infant_metadata.txt \ --o-visualization core_metrics_results_4250_depth/weighted_unifrac_emperor qiime emperor plot \ --i-pcoa core_metrics_results_4250_depth/jaccard_pcoa_results.qza \ --m-metadata-file infant_metadata.txt \ --o-visualization core_metrics_results_4250_depth/jaccard_emperor qiime emperor plot \ --i-pcoa core_metrics_results_4250_depth/bray_curtis_pcoa_results.qza \ --m-metadata-file infant_metadata.txt \ --o-visualization core_metrics_results_4250_depth/bray_curtis_emperor # ANCOM on QIIME qiime metadata tabulate \ --m-input-file taxonomy.qza \ --o-visualization taxonomy.qzv # Remove low-frequency samples qiime feature-table filter-features \ --i-table infant_only_table.qza \ --p-min-frequency 10 \ --o-filtered-table feature_frequency_infant_filtered_table.qza # Generate taxonomic barplot qiime taxa barplot --i-table feature_frequency_infant_filtered_table.qza --i-taxonomy taxonomy.qza --m-metadata-file infant_metadata.txt --o-visualization taxa-bar-plots.qzv # Collapse filtered feature table to family level qiime taxa collapse \ --i-table feature_frequency_infant_filtered_table.qza \ --i-taxonomy taxonomy.qza \ --p-level 5 \ --o-collapsed-table feature_frequency_infant_filtered_table_l5.qza # Create composition artifact for differential abundance analysis qiime composition add-pseudocount \ --i-table feature_frequency_infant_filtered_table_l5.qza \ --o-composition-table comp_frequency_infant_table_l5.qza # ANCOM on feed column qiime composition ancom \ --i-table comp_frequency_infant_table_l5.qza \ --m-metadata-file infant_metadata.txt \ --m-metadata-column feed \ --o-visualization l5_ancom_feed.qzv # l5_ancom_feed.qzv file exported as l5_ancom_feed.tsv in view.qiime2.org # to add customizations library(ggplot2) library(EnvStats) ancom <- read.table(file = "l5_ancom_feed.tsv", sep = '\t', header = TRUE) # Plotting the ANCOM volcano plot ggplot(ancom, aes(y = W, x = clr)) + geom_point() + theme_classic() + labs(x = "Centered Log Ratio (clr)", y = "W") + ylim(0, 100) + xlim(0,16) # faith_pd_group_significance.qzv file exported as faith.tsv in view.qiime2.org # to add customizations faith <- read.table(file = "faith_significance.tsv", sep = '\t', header = TRUE) # Plotting the Faith's PD boxplot ggplot(faith, aes(y = faith_pd, x = feed)) + geom_boxplot() + stat_n_text() + theme_classic() + labs(x = "Feeding Method", y = "Faith's Phylogenetic Diversity Index") # observed_features_group_significant.qzv file exported as # observed_feature_gs.tsv in view.qiime2.org # to add customizations observed_feature_gs <- read.table(file = "observed_feature_gs.tsv", sep = '\t', header = TRUE) # Plotting the observed_feature_gs boxplot ggplot(observed_feature_gs, aes(y = observed_features, x = feed)) + geom_boxplot() + stat_n_text() + theme_classic() + labs(x = "Feeding Method", y = "Observed Features") # shannon_group_significant.qzv file exported as # shannon_gs.tsv in view.qiime2.org # to add customizations shannon_gs <- read.table(file = "shannon_gs.tsv", sep = '\t', header = TRUE) # Plotting the shannon_gs boxplot ggplot(shannon_gs, aes(y = shannon_entropy, x = feed)) + geom_boxplot() + stat_n_text() + theme_classic() + labs(x = "Feeding Method", y = "Shannon's Index") # evenness_group_significance.qzv file exported as # evenness_gs.tsv in view.qiime2.org # to add customizations evenness_gs <- read.table(file = "evenness_gs.tsv", sep = '\t', header = TRUE) # Plotting the evenness boxplot ggplot(evenness_gs, aes(y = pielou_evenness, x = feed)) + geom_boxplot() + stat_n_text() + theme_classic() + labs(x = "Feeding Method", y = "Pielou's Evenness")