--- title: "MICB475_T11" output: html_document date: "2023-03-14" --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(tidyverse) library(phyloseq) library(vegan) library(ape) library(DESeq2) library(microbiome) library(ggrepel) library(broom) library(pheatmap) library(survminer) library(survival) library(RColorBrewer) citation("survminer") # library(randomForest) ``` ```{r tidying_metadata} # Read in files meta <- read_delim("parkinsons_metadata.txt", delim="\t") tax <- read_delim("parkinsons_taxonomy.tsv", delim="\t") tree <- read.tree("parkinsons_tree.nwk") OTUs <- read_delim(file = "parkinsons_feature-table.txt", delim="\t", skip=1) ### Create new columns flagging for high/low fish and early/late age of onset ### meta_df <- as.data.frame(meta[,-1]) rownames(meta_df) <- meta$SampleID # Fish meta_df_flagged <- meta_df %>% mutate( Fish_intake = case_when(Fish >= 27.2 ~ "High", Fish < 27.2 ~ "Low", TRUE ~ "NA") ) # Age of onset meta_modified <- meta_df_flagged %>% mutate( Age_of_onset = case_when(Age_onset >= 59 ~ "Later", Age_onset < 59 ~ "Early", TRUE ~ "NA")) # Filter for males meta_modified_filtered <- meta_modified %>% filter(Sex == "Male") # Remove samples NA for fish intake or age of onset meta_final <- meta_modified_filtered %>% filter(!Fish_intake == "NA" & !Age_of_onset == "NA") # Making a new column merging fish and onset status meta_fishonset <- meta_final %>% mutate(fish_age_status = case_when(Fish_intake == 'High' & Age_of_onset == "Later" ~ "HF_LO", Fish_intake == 'High' & Age_of_onset == "Early" ~ "HF_EO", Fish_intake == 'Low'& Age_of_onset == 'Later' ~ "LF_LO", Fish_intake == "Low" & Age_of_onset == "Early" ~ "LF_EO", TRUE ~ "NA") ) meta_fishonset ``` ```{r} # Survival analysis (we discussed during our meeting). surv_data <- meta_df_flagged %>% # For this analysis, we use the metadata filter(Disease == "PD", Fish_intake != "NA") fit <- surv_fit(formula = Surv(Age_onset) ~Fish_intake, surv_data) # Building the survival curve model. # Note that the structure is: surv_fit(formula = Surv(response_variable) ~ explanatory_var1 + exploratory_var2 + etc..., data) ggsurvplot(fit, conf.int = T, xlim = c(35, 90), legend.title = "High fish diet", ggtheme = theme_classic(base_size = 20))+ labs(x = "Age", y = "Parkinsons-free survival", color = "High fish diet") fit_male <- surv_fit(formula = Surv(Age_onset) ~Fish_intake, surv_data %>% filter(Sex == "Male")) fit_female <- surv_fit(formula = Surv(Age_onset) ~Fish_intake, surv_data %>% filter(Sex == "Female")) ## PVal computation combinedSurv_pval <- surv_pvalue(fit)[['pval.txt']] maleSurv_pval <- surv_pvalue(fit_male)[['pval.txt']] femaleSurv_pval <- surv_pvalue(fit_female)[['pval.txt']] ggsave("survplot_nonstrat.png", width = 8, height = 7) ggsurvplot(fit, conf.int = T, xlim = c(35, 90), facet.by = c("Sex"), legend.title = "High fish diet") + # Plotting survival plot faceted by sex theme_classic(base_size = 20)+ labs(x = "Age", y = "Parkinsons-free survival", color = "High fish diet")+ theme_classic(base_size = 20) ggsave("survplot_strat.png", height = 7, width = 14) ``` ```{r creating_phyloseq} # Generate phyloseq sample data SAMP <- sample_data(meta_fishonset) ### Format OTU table to matrix ### # Convert everything into a matrix besides column 1 OTU_mat <- as.matrix(OTUs[,-1]) # Assign OTU ID as the rownames of new matrix rownames(OTU_mat) <- OTUs$'#OTU ID' # Generate OTU table OTU <- otu_table(OTU_mat, taxa_are_rows = TRUE) ### Format taxonomy to table ### # Convert taxon strings to table separating taxa ranks tax_mat <- tax %>% select(-Confidence)%>% separate(col=Taxon, sep="; " , into = c("Domain","Phylum","Class","Order","Family","Genus","Species")) %>% as.matrix() # Convert sampleids to rownames tax_mat <- tax_mat[,-1] rownames(tax_mat) <- tax$'Feature ID' # Generate taxa table TAX <- tax_table(tax_mat) ### Create phyloseq object ### PD_phyloseq <- phyloseq(OTU, SAMP, TAX, tree) ``` ```{r processing_phyloseq} ### Processing ### # Remove ASVs with total counts (low-reads) below 5 PD_phyloseq_processed <- filter_taxa(PD_phyloseq, function(x) sum(x)>5, prune = TRUE) # Remove samples with reads below 100 PD_phyloseq_processed <- prune_samples(sample_sums(PD_phyloseq_processed)>100, PD_phyloseq_processed) ``` ```{r rarefication} ### Rarefy samples ### # Generate rarefaction curve #rarecurve(t(as.data.frame(otu_table(PD_phyloseq_processed))), cex = 0.1) # Filter based on sequencing depth = 3500 PD_rare <- rarefy_even_depth(PD_phyloseq_processed, rngseed = 1, sample.size = 3500) # Saving rarefied and non-rarefied phyloseq objects save(PD_phyloseq_processed, file="PD_notrare.RData") save(PD_rare, file="PD_rare.RData") ``` ```{r alpha_diversity and KW test} pd_temp <- PD_rare pd_temp@sam_data$fish_age_status_full <- gsub("HF_LO", "High Fish Late Onset", pd_temp@sam_data$fish_age_status) pd_temp@sam_data$fish_age_status_full <- gsub("LF_EO", "Low Fish Early Onset", pd_temp@sam_data$fish_age_status_full) pd_temp@sam_data$fish_age_status_full <- gsub("HF_EO", "High Fish Early Onset", pd_temp@sam_data$fish_age_status_full) pd_temp@sam_data$fish_age_status_full <- gsub("LF_LO", "Low Fish Late Onset", pd_temp@sam_data$fish_age_status_full) gg_richness_fishonest <- pd_temp %>% plot_richness(x = "fish_age_status_full", measures = "Shannon") + xlab("fish_age_status") + geom_boxplot(col = c('grey', 'black', 'gold', 'grey'), fill = c('darkgreen', 'yellow', 'darkred', 'darkblue')) + geom_jitter() + labs(x='the quad wizard tournament', y='Alpha Diversity Measure')+ theme_classic(base_size = 22) gg_richness_fishonest ggsave(filename = "plot_richness_fishonest.png" , gg_richness_fishonest , width = 9, height = 6) alphadiv <- estimate_richness(PD_rare, split = TRUE, measures='Shannon') samp_dat_wdiv <- data.frame(sample_data(PD_rare), alphadiv) kruskal.test(Shannon ~ fish_age_status, data = samp_dat_wdiv) ``` ```{r} PD_rare@sam_data$shannon <- estimate_richness(PD_rare)[,6] PD_rare@sam_data %>% as_tibble() %>% mutate(age_binning = ceiling(Age/4) * 4) %>% ggplot()+ xlab("Age") + geom_boxplot(aes(x = factor(age_binning), y = shannon)) ``` ```{r beta_diversity and permanova} ### Beta diversity and PCoA visualization ### wunifrac_dm <- phyloseq::distance(pd_temp, method="wunifrac") pcoa_wunifrac <- ordinate(pd_temp, method="PCoA", distance=wunifrac_dm) gg_pcoa <- plot_ordination(pd_temp, pcoa_wunifrac, color = "fish_age_status_full") + labs(color = "Category") + stat_ellipse(level = 0.95)+ scale_color_manual(values = c('darkgreen', 'yellow', 'darkred', 'darkblue')) gg_pcoa # PERMANOVA dm_unifrac <- UniFrac(PD_rare, weighted=TRUE) data_4_permanova <- data.frame(sample_data(PD_rare)) adonis2(dm_unifrac ~ Fish_intake*Age_of_onset + Energy_kj, data=data_4_permanova) # Save plot ggsave("pcoa_wunifrac.png" , gg_pcoa , height = 4, width = 7) ``` ```{r new fish definitions} # Filtering for male PD patients metafish_new <- meta_df %>% filter(Sex == "Male", Disease == "PD") # Selecting top and lowest 40% of the dataset by fish consumption (based on male PD population) metafish_new <- metafish_new %>% # filter(Fish <= quantile(metafish_new$Fish, 0.4, na.rm = T) | Fish >= quantile(metafish_new$Fish, 0.6, na.rm = T)) %>% mutate(Fish_intake = case_when(Fish > 27.3 ~ "high_fish", Fish <= 27.3 ~ "low_fish", TRUE ~ NA)) %>% filter(!is.na(Fish_intake)) # Generate phyloseq sample data from filtered data smp <- sample_data(metafish_new) ### Format OTU table to matrix ### # Convert everything into a matrix besides column 1 OTU_mat <- as.matrix(OTUs[,-1]) # Assign OTU ID as the rownames of new matrix rownames(OTU_mat) <- OTUs$'#OTU ID' # Generate OTU table OTU <- otu_table(OTU_mat, taxa_are_rows = TRUE) ### Format taxonomy to table ### # Convert taxon strings to table separating taxa ranks tax_mat <- tax %>% select(-Confidence)%>% separate(col=Taxon, sep="; " , into = c("Domain","Phylum","Class","Order","Family","Genus","Species")) %>% as.matrix() # Convert sampleids to rownames tax_mat <- tax_mat[,-1] rownames(tax_mat) <- tax$'Feature ID' # Generate taxa table TAX <- tax_table(tax_mat) ### Create phyloseq object ### PD_phyloseq_dev <- phyloseq(OTU, smp, TAX, tree) # Set prevalence at 50% (half of samples must possess genus) and abundance at 10^-3 abundance_filter <- 0.0001 prev_filter <- 0 PD_phyloseq_glommed <- PD_phyloseq_dev %>% tax_glom("Genus", NArm = T) # # Prevalence filter # modified_sampledata_df <- PD_phyloseq_glommed@otu_table@.Data %>% as.data.frame() # # # PD_phyloseqG_prev_filtered = prune_taxa(apply(modified_sampledata_df, 1, function(x) sum(x != 0, na.rm = TRUE)/length(x[!is.na(x)]))>prev_filter,PD_phyloseq_glommed) # # # Abundance filter PD_phyloseqGP_abundance_filtered<-prune_taxa((taxa_sums(transform(PD_phyloseq_glommed,'compositional'))/nsamples(PD_phyloseq_glommed))>abundance_filter,PD_phyloseq_glommed) PD_phyloseqGPA_p1 <- transform_sample_counts(PD_phyloseqGP_abundance_filtered, function(x) x+1) # Run deseq2 PD_phyloseq_GPAp1_deseq <- phyloseq_to_deseq2(PD_phyloseqGPA_p1, ~Fish_intake) PD_phyloseq_GPAp1_deseq$Fish_intake <- factor(PD_phyloseq_GPAp1_deseq$Fish_intake,levels = c("low_fish", "high_fish")) PD_deseq_output <- DESeq(PD_phyloseq_GPAp1_deseq, fitType="local") deseq_results <- results(PD_deseq_output) deseq_results <- deseq_results[order(deseq_results$padj),] deseq_results_tidy = cbind(as(deseq_results, "data.frame"), as(tax_table(PD_phyloseq_glommed)[rownames(deseq_results), ], "matrix")) deseq_results_tidy %>% arrange(padj) ``` ```{r creating_deseq_barplot} # Create bar plot # To get table of results sigASVs <- deseq_results_tidy %>% filter(padj<=0.05 & abs(log2FoldChange)>1) # %>% # dplyr::rename(ASV=row) sigASVs # Get a vector of ASV names sigASVs_vec <- row.names(sigASVs) # Prune phyloseq file sigASV_phyloseq <- prune_taxa(sigASVs_vec,PD_phyloseq_processed) otu_table(sigASV_phyloseq) %>% as.data.frame() # Add taxonomy onto DESeq results table merged_results <- sigASVs %>% arrange(log2FoldChange) %>% mutate(Genus = make.unique(Genus)) %>% mutate(Genus = factor(Genus, levels=unique(Genus)), direction = case_when(log2FoldChange > 0 ~ "#f94449", log2FoldChange < 0 ~ "lightblue")) # Make DESeq plot ggplot(merged_results %>% filter(!is.na(Genus), !grepl("^NA", Genus))) + geom_bar(aes(x=Genus, y=log2FoldChange, fill = direction), stat="identity",width = 0.8, color = "black",show.legend = FALSE)+ geom_errorbar(aes(x=Genus, ymin=log2FoldChange-lfcSE, ymax=log2FoldChange+lfcSE), width = 0.2) + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))+ labs(fill = "differential abundance") + coord_flip()+ scale_fill_discrete(labels = c("High", "Low"))+ theme_classic(base_size = 24) ggsave("deseq_plot.png", width = 12, height = 8) ``` ```{r creating_deseq_volcano plot} volcano_data <- deseq_results %>% as.data.frame() %>% mutate(plot_color = case_when(log2FoldChange > 1 & padj <= 0.05 ~ "#f94449", log2FoldChange < -1 & padj <= 0.05 ~ "#8cd3ff", TRUE ~ "#6f6f6f")) plot_color_vector <- volcano_data$plot_color ggplot(volcano_data)+ geom_point(aes(x = log2FoldChange, y = -log10(padj)), color = plot_color_vector)+ geom_vline(xintercept = -1, alpha = 0.4)+ geom_vline(xintercept = 1, alpha = 0.4)+ geom_hline(yintercept = -log10(0.05), alpha = 0.4)+ geom_label_repel(data = deseq_results_tidy %>% filter(abs(log2FoldChange) > 1,padj < 0.05), aes(label = Genus, x = log2FoldChange, y= -log10(padj)), box.padding = unit(1, "lines"), alpha = 1, point.padding = 0, nudge_x = 1.2) labelled_hits <- merged_results %>% filter(padj < 0.05) %>% filter(!grepl("uncultured", Genus, ignore.case = T)) %>% mutate(Genus = str_match(Genus, "g__\\[?([A-Za-z]*)")[,2]) ggplot(volcano_data)+ geom_point(aes(x = log2FoldChange, y = -log10(padj)), color = plot_color_vector, size = 3, alpha = 0.7)+ geom_point(aes(x = log2FoldChange, y = -log10(padj)), size = 3, shape = 1)+ geom_vline(xintercept = -1)+ geom_vline(xintercept = 1)+ geom_hline(yintercept = -log10(0.05))+ geom_label_repel(data =labelled_hits, aes(label = Genus, x = log2FoldChange, y= -log10(padj)), box.padding = unit(1, "lines"), alpha = 1, max.time = 10, point.padding = 0.5, force = 1, size = 6)+ theme_classic(base_size = 24) #Filtered out some labels that were causing too many overlaps, couldn't properly read labels ggsave("volcanoplot.png", width = 12, height = 7) ``` ```{r spearman correlation} spearcor_input <- deseq_results_tidy %>% filter(padj < 0.05) %>% select(2) spearcor_meta <- sample_data(PD_phyloseq_processed) taxadata <- otu_table(PD_phyloseq_processed) %>% as.data.frame() taxadata$asv <- row.names(taxadata) taxadata <- taxadata %>% pivot_longer(F1256:111, names_to = "sample", values_to = "abundance") %>% group_by(sample) %>% mutate(rel_abundance = abundance/sum(abundance)) %>% select(-abundance) %>% pivot_wider(names_from = "sample", values_from = "rel_abundance") spearcor_taxa <- taxadata %>% filter(asv %in% row.names(spearcor_input)) %>% t() %>% as.data.frame() colnames(spearcor_taxa) <- spearcor_taxa[1,] spearcor_taxa_clean <- spearcor_taxa[-1,] %>% merge(spearcor_meta[,"Age_onset"], by.x = "row.names", by.y = "row.names") cor_results <- spearcor_taxa_clean[,3:ncol(spearcor_taxa_clean)-1] %>% apply(2, function(x){ # print(as.numeric(x)) abundance_numeric <- as.numeric(x) cor_result <- cor.test(x = abundance_numeric, y = spearcor_taxa_clean$Age_onset) c(cor_result[3][[1]], cor_result[4][[1]]) }) cor_results <- as.data.frame(cor_results) %>% t() %>% as.data.frame() colnames(cor_results) <- c("pval", "cor_coeff") cor_results$padj <- cor_results$pval %>% p.adjust("fdr") cor_results %>% filter(padj < 0.05) ``` ```{r} rel_abund <- spearcor_taxa_clean[,"c6faae06a70c97500e4f329b617e70b7"] %>% as.numeric() age_onset <- spearcor_taxa_clean$Age_onset plot(spearcor_taxa_clean[,"c6faae06a70c97500e4f329b617e70b7"], spearcor_taxa_clean$Age_onset) ggplot()+ geom_point(aes(x = age_onset, y = rel_abund))+ geom_smooth(aes(x = age_onset, y = rel_abund), method = "lm") ``` ```{r} otu_alldata <- OTU %>% as.data.frame() otu_alldata$asv <- row.names(otu_alldata) otu_alldata <- otu_alldata %>% pivot_longer(F1052:300, names_to = "sample", values_to = "abundance") %>% group_by(sample) %>% mutate(rel_abundance = abundance/sum(abundance)) %>% select(-abundance) %>% pivot_wider(names_from = "sample", values_from = "rel_abundance") %>% as.data.frame() row.names(otu_alldata) <- otu_alldata$asv otu_alldata <- otu_alldata %>% select(-asv) %>% t() %>% as.data.frame() temp <- all_data_phyloseq@otu_table %>% t() %>% as.data.frame() deseq_results_tidy corrplot_data <- meta_df %>% select(Disease, Age, Age_onset, Fish, Sex) %>% merge(otu_alldata["b21cc0edd44ff27de967257d89a72e2d"], by = "row.names") # ggplot(corrplot_data, aes(`c6faae06a70c97500e4f329b617e70b7`, Age, color = Fish > 27.3)) + # geom_point()+ # geom_smooth(method = "lm")+ # # xlim(c(0, 0.04))+ # facet_grid(. ~Sex) ggplot(corrplot_data, aes(`b21cc0edd44ff27de967257d89a72e2d`, Age, color = Disease)) + geom_point()+ geom_smooth(method = "lm")+ # xlim(c(0.000000001, 0.04))+ facet_grid(Sex ~.)+ theme(text = element_text(size = 16))+ labs(x = "Intestinimonas relative abundance") ggsave("corrplot_intestinimonas.png", width = 8, height = 6) ``` ```{r} all_data_phyloseq <- phyloseq(OTU, meta_df, TAX, tree) all_data_phyloseq_glommed <- all_data_phyloseq %>% tax_glom(taxrank = "Genus") taxadata_alldata <- all_data_phyloseq_glommed@otu_table %>% merge(all_data_phyloseq_glommed@tax_table %>% as.data.frame(), by.x = "row.names", by.y = "row.names") taxadata_alldata <- taxadata_alldata %>% pivot_longer(F1052:F9959, names_to = "sample", values_to = "abundance") %>% group_by(sample) %>% mutate(rel_abundance = abundance) %>% #/sum(abundance)) %>% select(-abundance) %>% pivot_wider(names_from = "sample", values_from = "rel_abundance") %>% select(Genus, F1052:F9959) heatmap_data <- data.frame(asv = c(), male_pval = c(), male_estimate = c(), female_pval = c(), female_estimate = c(), direction = c()) for (species in deseq_results_tidy$Genus[deseq_results_tidy$padj <= 0.05 & abs(deseq_results_tidy$log2FoldChange) > 1 & !grepl("uncultured", deseq_results_tidy$Genus)]){ loop_data <- taxadata_alldata %>% filter(Genus == species) %>% pivot_longer(cols = F1052:301, names_to = "sample", values_to = "rel_abundance") %>% merge(meta_df, by.x = "sample", by.y = "row.names") loop_direction <- ifelse(deseq_results_tidy$log2FoldChange[deseq_results_tidy$Genus == species] > 0, "Increase", "Decrease") loop_male_lm <- lm(Age ~ Disease * rel_abundance + Energy_kj + BMI, data = loop_data[loop_data$Sex == "Male",], na.action = na.omit) %>% tidy() male_pval <- loop_male_lm$p.value[[6]] male_estimate <- loop_male_lm$estimate[[6]] loop_female_lm <- lm(Age ~ Disease * rel_abundance + Energy_kj + BMI, data = loop_data[loop_data$Sex == "Female",], na.action = na.omit) %>% tidy() female_pval <- loop_female_lm$p.value[[6]] female_estimate <- loop_female_lm$estimate[[6]] loop_results <- data.frame(asv = c(species), male_pval = c(male_pval), male_estimate = c(male_estimate), female_pval = c(female_pval), female_estimate = c(female_estimate), direction = loop_direction) heatmap_data <- bind_rows(heatmap_data, loop_results) } heatmap_data$male_pval <- heatmap_data$male_pval %>% p.adjust(method = "fdr") heatmap_data$female_pval <- heatmap_data$female_pval %>% p.adjust(method = "fdr") heatmap_data_adj <- heatmap_data %>% mutate(Genus = str_match(asv, "g__(.*)")[,2]) %>% column_to_rownames("Genus") %>% select(-asv) %>% na.omit() heatmap_input <- heatmap_data_adj[,c(2,4)] %>% as.matrix() log2fc_pheatmap <- pheatmap(heatmap_input, color = colorRampPalette(c("#2265FF", "#FF4949"))(100), breaks = c(seq(-1,-0.2001, 0.8/10), seq(-0.2,-0.0001, 0.2/40), seq(0,0.2, 0.2/40), seq(0.2001,1, 0.8/10)), cluster_rows = T, cluster_cols = F, cellwidth = 25, treeheight_row = 0, annotation_row = heatmap_data_adj %>% select("Log2FC(High-Low)" = direction)) ggsave("log2FC_pheatmap_lm.png", plot = log2fc_pheatmap, height = 5, width = 6) deseq_results_tidy %>% filter(padj <= 0.05) %>% arrange(log2FoldChange) ``` ```{r} distribution_data <- meta_fishonset %>% merge(otu_alldata["b21cc0edd44ff27de967257d89a72e2d"], by = "row.names")%>% mutate(sample = factor(Row.names)) distribution_data %>% ggplot(aes(x = as.character(Row.names), y = b21cc0edd44ff27de967257d89a72e2d))+ geom_bar(stat = "identity")+ facet_wrap(facets = "fish_age_status")+ theme(axis.text.x = element_text(angle = 90)) colorRamp_15 <- colorRampPalette(brewer.pal(8, "RdYlBu"))(15) distribution_data %>% group_by(b21cc0edd44ff27de967257d89a72e2d, fish_age_status) %>% sample_n(1) %>% filter(b21cc0edd44ff27de967257d89a72e2d != 0)%>% ggplot(aes(x = factor(sample), y = b21cc0edd44ff27de967257d89a72e2d))+ geom_bar(stat = "identity", width = 0.7, fill = "#f88605", color = "black", size = 1)+ facet_grid(fish_age_status~., scale = "free")+ coord_flip()+ theme(axis.text = element_text(size = 14), text = element_text(size = 17))+ labs(x = "Fish and Age of Onset Status", y = "Relative abundance") ggsave("intestimonas_dist.png", width = 6, height = 8) distribution_data %>% group_by(fish_age_status, b21cc0edd44ff27de967257d89a72e2d == 0)%>% summarize(count = n())%>% mutate(prop = count/sum(count), prop_neg = 1 - prop) %>% filter(`b21cc0edd44ff27de967257d89a72e2d == 0`) %>% ggplot()+ geom_bar(aes(x = reorder(fish_age_status, prop_neg, decreasing = TRUE), y = prop_neg), stat = "identity", width = 0.7, fill = "#f88605", color = "black", size = 1)+ labs(x = "Fish and Age of Onset Status", y = "Prevalence (Out of Male PD)") ggsave("intestimonas_prevalence.png", width = 6, height = 5) meta_fishonset %>% ggplot(aes(x = Age, y = age_onset))+ geom_point()+ geom_smooth(method = "lm")+ geom_label(aes(x = 71, y = 45, label = paste0("Correlation coefficient = ",round(cor(meta_fishonset$Age, meta_fishonset$Age_onset), 2))),size = 6)+ labs(y = "Age of disease onset", x = "Current age at time of study")+ theme(axis.text = element_text(size = 13)) ggsave("cor_age_ageOnset.png", width = 7, height = 4.5) ``` ```{r} palette_Graph <- palette.colors(palette = "Set1") meta_df %>% ggplot()+ geom_density(aes(x = Age_onset), fill = "gray", alpha = 0.6)+ geom_vline(xintercept = median(meta_df$Age_onset, na.rm = TRUE))+ labs(x = "Age of onset", y = "Density") ggsave("AgeOnset_density.png", width = 7, height = 4.5) meta_df %>% ggplot()+ geom_density(aes(x = Fish, fill = factor(Sex, levels = c("Male", "Female"))), alpha = 0.4)+ geom_vline(xintercept = median(meta_df$Fish[meta_df$Sex == "Male"], na.rm = TRUE), color = "#F8766D")+ geom_vline(xintercept = median(meta_df$Fish[meta_df$Sex == "Female"], na.rm = TRUE), color = "#00BFC4")+ labs(x = "Fish Intake", y = "Density", fill = "Sex") ggsave("densityFish.png", width = 7, height = 4.5) ``` ```{r MetaCyc} SMP <- sample_data(metafish_new) # Create new phyloseq object with MetaCyc terms # metacyc <- read_delim("path_abun_unstrat.tsv", delim="\t") %>% # merge(meta_df, by.x = "sequence", by.y = "row.names") mc <- data.table::fread("path_abun_unstrat.tsv") %>% column_to_rownames("pathway") t = mc %>% mutate(mc_term = rownames(mc),Genus = rownames(mc)) %>% select(mc_term,Genus) biom <- phyloseq(otu_table(as.matrix(mc),taxa_are_rows = T), SMP) # Geometric mean function # gm_mean = function(x, na.rm=TRUE){ # exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) # } abundance_filter <- 0.000 pval <- 0.05 prev_filter <- 0 # Prevalence filter prev_table <- biom@otu_table@.Data %>% as.data.frame() phylo_prev_filtered = prune_taxa(apply(prev_table, 1, function(x) sum(x != 0, na.rm = TRUE)/length(x[!is.na(x)]))>prev_filter,biom) # Abundance filter phylo_abundance_filtered = prune_taxa((taxa_sums(transform(phylo_prev_filtered,'compositional'))/nsamples(phylo_prev_filtered))>abundance_filter,phylo_prev_filtered) phyloseq_filtered_p1 <- transform_sample_counts(phylo_abundance_filtered, function(x) x+1) # # Run deseq2 temp2 <- phyloseq_to_deseq2(phyloseq_filtered_p1, ~Fish_intake) # # # temp2 = estimateSizeFactors(temp2, geoMeans = geoMeans) temp2 = DESeq(temp2, fitType="local") temp2$Fish_intake <- factor(temp2$Fish_intake,levels = c("low_fish", "high_fish")) res = results(temp2) res = res[order(res$padj), ] metaCyc_data <- res %>% as.data.frame() %>% filter(padj <= 0.05) %>% rownames_to_column("pathway") pathway_names <- tibble(name = metaCyc_data$pathway, value = c("sulfoquinovose degradation I", "superpathway of lipopolysaccharide biosynthesis", "phenylacetate degradation I (aerobic)", "superpathway of phenylethylamine degradation", "superpathway of L-threonine metabolism", "4-hydroxyphenylacetate degradation", "formaldehyde assimilation I (serine pathway)", "aerobactin biosynthesis", "phosphopantothenate biosynthesis III", "mevalonate pathway II (archaea)", "peptidoglycan biosynthesis V (β-lactam resistance)")) metaCycplot_data <- metaCyc_data %>% merge(pathway_names, by.x = "pathway", by.y = "name") %>% mutate(direction = case_when(log2FoldChange > 0 ~ "#f94449", log2FoldChange < 0 ~ "lightblue")) ggplot(metaCycplot_data)+ geom_col(aes(x = reorder(value, log2FoldChange), y = log2FoldChange, fill = direction), width = 0.8, color = "black")+ labs(x = "MetaCyc pathway", y = "Log2FC(high_fish-low_fish)", fill = "Direction")+ geom_errorbar(aes(x=reorder(value, log2FoldChange), ymin=log2FoldChange-lfcSE, ymax=log2FoldChange+lfcSE), width = 0.2)+ theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))+ scale_fill_discrete(labels = c("High", "Low"))+ coord_flip()+ theme_classic(base_size = 22) ggsave("metaCyc.png", height = 7, width = 14) ```