---
title: "MICB475_T11"
output: html_document
date: "2023-03-14"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(phyloseq)
library(vegan)
library(ape)
library(DESeq2)
library(microbiome)
library(ggrepel)
library(broom)
library(pheatmap)
library(survminer)
library(survival)
library(RColorBrewer)
citation("survminer")
# library(randomForest)
```
```{r tidying_metadata}
# Read in files
meta <- read_delim("parkinsons_metadata.txt", delim="\t")
tax <- read_delim("parkinsons_taxonomy.tsv", delim="\t")
tree <- read.tree("parkinsons_tree.nwk")
OTUs <- read_delim(file = "parkinsons_feature-table.txt", delim="\t", skip=1)
### Create new columns flagging for high/low fish and early/late age of onset ###
meta_df <- as.data.frame(meta[,-1])
rownames(meta_df) <- meta$SampleID
# Fish
meta_df_flagged <- meta_df %>%
mutate(
Fish_intake = case_when(Fish >= 27.2 ~ "High",
Fish < 27.2 ~ "Low",
TRUE ~ "NA")
)
# Age of onset
meta_modified <- meta_df_flagged %>%
mutate(
Age_of_onset = case_when(Age_onset >= 59 ~ "Later",
Age_onset < 59 ~ "Early",
TRUE ~ "NA"))
# Filter for males
meta_modified_filtered <- meta_modified %>%
filter(Sex == "Male")
# Remove samples NA for fish intake or age of onset
meta_final <- meta_modified_filtered %>%
filter(!Fish_intake == "NA" & !Age_of_onset == "NA")
# Making a new column merging fish and onset status
meta_fishonset <- meta_final %>%
mutate(fish_age_status = case_when(Fish_intake == 'High' & Age_of_onset == "Later" ~ "HF_LO",
Fish_intake == 'High' & Age_of_onset == "Early" ~ "HF_EO",
Fish_intake == 'Low'& Age_of_onset == 'Later' ~ "LF_LO",
Fish_intake == "Low" & Age_of_onset == "Early" ~ "LF_EO",
TRUE ~ "NA")
)
meta_fishonset
```
```{r}
# Survival analysis (we discussed during our meeting).
surv_data <- meta_df_flagged %>% # For this analysis, we use the metadata
filter(Disease == "PD", Fish_intake != "NA")
fit <- surv_fit(formula = Surv(Age_onset) ~Fish_intake, surv_data) # Building the survival curve model.
# Note that the structure is: surv_fit(formula = Surv(response_variable) ~ explanatory_var1 + exploratory_var2 + etc..., data)
ggsurvplot(fit, conf.int = T, xlim = c(35, 90), legend.title = "High fish diet", ggtheme = theme_classic(base_size = 20))+
labs(x = "Age", y = "Parkinsons-free survival", color = "High fish diet")
fit_male <- surv_fit(formula = Surv(Age_onset) ~Fish_intake, surv_data %>% filter(Sex == "Male"))
fit_female <- surv_fit(formula = Surv(Age_onset) ~Fish_intake, surv_data %>% filter(Sex == "Female"))
## PVal computation
combinedSurv_pval <- surv_pvalue(fit)[['pval.txt']]
maleSurv_pval <- surv_pvalue(fit_male)[['pval.txt']]
femaleSurv_pval <- surv_pvalue(fit_female)[['pval.txt']]
ggsave("survplot_nonstrat.png", width = 8, height = 7)
ggsurvplot(fit, conf.int = T, xlim = c(35, 90), facet.by = c("Sex"), legend.title = "High fish diet") + # Plotting survival plot faceted by sex
theme_classic(base_size = 20)+
labs(x = "Age", y = "Parkinsons-free survival", color = "High fish diet")+
theme_classic(base_size = 20)
ggsave("survplot_strat.png", height = 7, width = 14)
```
```{r creating_phyloseq}
# Generate phyloseq sample data
SAMP <- sample_data(meta_fishonset)
### Format OTU table to matrix ###
# Convert everything into a matrix besides column 1
OTU_mat <- as.matrix(OTUs[,-1])
# Assign OTU ID as the rownames of new matrix
rownames(OTU_mat) <- OTUs$'#OTU ID'
# Generate OTU table
OTU <- otu_table(OTU_mat, taxa_are_rows = TRUE)
### Format taxonomy to table ###
# Convert taxon strings to table separating taxa ranks
tax_mat <- tax %>%
select(-Confidence)%>%
separate(col=Taxon, sep="; "
, into = c("Domain","Phylum","Class","Order","Family","Genus","Species")) %>%
as.matrix()
# Convert sampleids to rownames
tax_mat <- tax_mat[,-1]
rownames(tax_mat) <- tax$'Feature ID'
# Generate taxa table
TAX <- tax_table(tax_mat)
### Create phyloseq object ###
PD_phyloseq <- phyloseq(OTU, SAMP, TAX, tree)
```
```{r processing_phyloseq}
### Processing ###
# Remove ASVs with total counts (low-reads) below 5
PD_phyloseq_processed <- filter_taxa(PD_phyloseq, function(x) sum(x)>5, prune = TRUE)
# Remove samples with reads below 100
PD_phyloseq_processed <- prune_samples(sample_sums(PD_phyloseq_processed)>100, PD_phyloseq_processed)
```
```{r rarefication}
### Rarefy samples ###
# Generate rarefaction curve
#rarecurve(t(as.data.frame(otu_table(PD_phyloseq_processed))), cex = 0.1)
# Filter based on sequencing depth = 3500
PD_rare <- rarefy_even_depth(PD_phyloseq_processed, rngseed = 1, sample.size = 3500)
# Saving rarefied and non-rarefied phyloseq objects
save(PD_phyloseq_processed, file="PD_notrare.RData")
save(PD_rare, file="PD_rare.RData")
```
```{r alpha_diversity and KW test}
pd_temp <- PD_rare
pd_temp@sam_data$fish_age_status_full <- gsub("HF_LO", "High Fish Late Onset", pd_temp@sam_data$fish_age_status)
pd_temp@sam_data$fish_age_status_full <- gsub("LF_EO", "Low Fish Early Onset", pd_temp@sam_data$fish_age_status_full)
pd_temp@sam_data$fish_age_status_full <- gsub("HF_EO", "High Fish Early Onset", pd_temp@sam_data$fish_age_status_full)
pd_temp@sam_data$fish_age_status_full <- gsub("LF_LO", "Low Fish Late Onset", pd_temp@sam_data$fish_age_status_full)
gg_richness_fishonest <- pd_temp %>% plot_richness(x = "fish_age_status_full", measures = "Shannon") +
xlab("fish_age_status") +
geom_boxplot(col = c('grey', 'black', 'gold', 'grey'),
fill = c('darkgreen', 'yellow', 'darkred', 'darkblue')) +
geom_jitter() +
labs(x='the quad wizard tournament', y='Alpha Diversity Measure')+
theme_classic(base_size = 22)
gg_richness_fishonest
ggsave(filename = "plot_richness_fishonest.png"
, gg_richness_fishonest
, width = 9, height = 6)
alphadiv <- estimate_richness(PD_rare, split = TRUE, measures='Shannon')
samp_dat_wdiv <- data.frame(sample_data(PD_rare), alphadiv)
kruskal.test(Shannon ~ fish_age_status, data = samp_dat_wdiv)
```
```{r}
PD_rare@sam_data$shannon <- estimate_richness(PD_rare)[,6]
PD_rare@sam_data %>% as_tibble() %>%
mutate(age_binning = ceiling(Age/4) * 4) %>%
ggplot()+
xlab("Age") +
geom_boxplot(aes(x = factor(age_binning), y = shannon))
```
```{r beta_diversity and permanova}
### Beta diversity and PCoA visualization ###
wunifrac_dm <- phyloseq::distance(pd_temp, method="wunifrac")
pcoa_wunifrac <- ordinate(pd_temp, method="PCoA", distance=wunifrac_dm)
gg_pcoa <- plot_ordination(pd_temp, pcoa_wunifrac, color = "fish_age_status_full") +
labs(color = "Category") +
stat_ellipse(level = 0.95)+
scale_color_manual(values = c('darkgreen', 'yellow', 'darkred', 'darkblue'))
gg_pcoa
# PERMANOVA
dm_unifrac <- UniFrac(PD_rare, weighted=TRUE)
data_4_permanova <- data.frame(sample_data(PD_rare))
adonis2(dm_unifrac ~ Fish_intake*Age_of_onset + Energy_kj, data=data_4_permanova)
# Save plot
ggsave("pcoa_wunifrac.png"
, gg_pcoa
, height = 4, width = 7)
```
```{r new fish definitions}
# Filtering for male PD patients
metafish_new <- meta_df %>%
filter(Sex == "Male", Disease == "PD")
# Selecting top and lowest 40% of the dataset by fish consumption (based on male PD population)
metafish_new <- metafish_new %>%
# filter(Fish <= quantile(metafish_new$Fish, 0.4, na.rm = T) | Fish >= quantile(metafish_new$Fish, 0.6, na.rm = T)) %>%
mutate(Fish_intake = case_when(Fish > 27.3 ~ "high_fish",
Fish <= 27.3 ~ "low_fish",
TRUE ~ NA)) %>%
filter(!is.na(Fish_intake))
# Generate phyloseq sample data from filtered data
smp <- sample_data(metafish_new)
### Format OTU table to matrix ###
# Convert everything into a matrix besides column 1
OTU_mat <- as.matrix(OTUs[,-1])
# Assign OTU ID as the rownames of new matrix
rownames(OTU_mat) <- OTUs$'#OTU ID'
# Generate OTU table
OTU <- otu_table(OTU_mat, taxa_are_rows = TRUE)
### Format taxonomy to table ###
# Convert taxon strings to table separating taxa ranks
tax_mat <- tax %>%
select(-Confidence)%>%
separate(col=Taxon, sep="; "
, into = c("Domain","Phylum","Class","Order","Family","Genus","Species")) %>%
as.matrix()
# Convert sampleids to rownames
tax_mat <- tax_mat[,-1]
rownames(tax_mat) <- tax$'Feature ID'
# Generate taxa table
TAX <- tax_table(tax_mat)
### Create phyloseq object ###
PD_phyloseq_dev <- phyloseq(OTU, smp, TAX, tree)
# Set prevalence at 50% (half of samples must possess genus) and abundance at 10^-3
abundance_filter <- 0.0001
prev_filter <- 0
PD_phyloseq_glommed <- PD_phyloseq_dev %>% tax_glom("Genus", NArm = T)
# # Prevalence filter
# modified_sampledata_df <- PD_phyloseq_glommed@otu_table@.Data %>% as.data.frame()
# #
# PD_phyloseqG_prev_filtered = prune_taxa(apply(modified_sampledata_df, 1, function(x) sum(x != 0, na.rm = TRUE)/length(x[!is.na(x)]))>prev_filter,PD_phyloseq_glommed)
#
# # Abundance filter
PD_phyloseqGP_abundance_filtered<-prune_taxa((taxa_sums(transform(PD_phyloseq_glommed,'compositional'))/nsamples(PD_phyloseq_glommed))>abundance_filter,PD_phyloseq_glommed)
PD_phyloseqGPA_p1 <- transform_sample_counts(PD_phyloseqGP_abundance_filtered, function(x) x+1)
# Run deseq2
PD_phyloseq_GPAp1_deseq <- phyloseq_to_deseq2(PD_phyloseqGPA_p1, ~Fish_intake)
PD_phyloseq_GPAp1_deseq$Fish_intake <- factor(PD_phyloseq_GPAp1_deseq$Fish_intake,levels = c("low_fish", "high_fish"))
PD_deseq_output <- DESeq(PD_phyloseq_GPAp1_deseq, fitType="local")
deseq_results <- results(PD_deseq_output)
deseq_results <- deseq_results[order(deseq_results$padj),]
deseq_results_tidy = cbind(as(deseq_results, "data.frame"),
as(tax_table(PD_phyloseq_glommed)[rownames(deseq_results), ], "matrix"))
deseq_results_tidy %>% arrange(padj)
```
```{r creating_deseq_barplot}
# Create bar plot
# To get table of results
sigASVs <- deseq_results_tidy %>%
filter(padj<=0.05 & abs(log2FoldChange)>1) # %>%
# dplyr::rename(ASV=row)
sigASVs
# Get a vector of ASV names
sigASVs_vec <- row.names(sigASVs)
# Prune phyloseq file
sigASV_phyloseq <- prune_taxa(sigASVs_vec,PD_phyloseq_processed)
otu_table(sigASV_phyloseq) %>% as.data.frame()
# Add taxonomy onto DESeq results table
merged_results <- sigASVs %>%
arrange(log2FoldChange) %>%
mutate(Genus = make.unique(Genus)) %>%
mutate(Genus = factor(Genus, levels=unique(Genus)),
direction = case_when(log2FoldChange > 0 ~ "#f94449",
log2FoldChange < 0 ~ "lightblue"))
# Make DESeq plot
ggplot(merged_results %>% filter(!is.na(Genus), !grepl("^NA", Genus))) +
geom_bar(aes(x=Genus, y=log2FoldChange, fill = direction), stat="identity",width = 0.8, color = "black",show.legend = FALSE)+
geom_errorbar(aes(x=Genus, ymin=log2FoldChange-lfcSE, ymax=log2FoldChange+lfcSE), width = 0.2) +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))+
labs(fill = "differential abundance") +
coord_flip()+
scale_fill_discrete(labels = c("High", "Low"))+
theme_classic(base_size = 24)
ggsave("deseq_plot.png", width = 12, height = 8)
```
```{r creating_deseq_volcano plot}
volcano_data <- deseq_results %>% as.data.frame() %>%
mutate(plot_color = case_when(log2FoldChange > 1 & padj <= 0.05 ~ "#f94449",
log2FoldChange < -1 & padj <= 0.05 ~ "#8cd3ff",
TRUE ~ "#6f6f6f"))
plot_color_vector <- volcano_data$plot_color
ggplot(volcano_data)+
geom_point(aes(x = log2FoldChange, y = -log10(padj)), color = plot_color_vector)+
geom_vline(xintercept = -1, alpha = 0.4)+
geom_vline(xintercept = 1, alpha = 0.4)+
geom_hline(yintercept = -log10(0.05), alpha = 0.4)+
geom_label_repel(data = deseq_results_tidy %>% filter(abs(log2FoldChange) > 1,padj < 0.05), aes(label = Genus, x = log2FoldChange, y= -log10(padj)), box.padding = unit(1, "lines"), alpha = 1, point.padding = 0, nudge_x = 1.2)
labelled_hits <- merged_results %>%
filter(padj < 0.05) %>%
filter(!grepl("uncultured", Genus, ignore.case = T)) %>%
mutate(Genus = str_match(Genus, "g__\\[?([A-Za-z]*)")[,2])
ggplot(volcano_data)+
geom_point(aes(x = log2FoldChange, y = -log10(padj)),
color = plot_color_vector,
size = 3,
alpha = 0.7)+
geom_point(aes(x = log2FoldChange, y = -log10(padj)),
size = 3,
shape = 1)+
geom_vline(xintercept = -1)+
geom_vline(xintercept = 1)+
geom_hline(yintercept = -log10(0.05))+
geom_label_repel(data =labelled_hits, aes(label = Genus, x = log2FoldChange, y= -log10(padj)), box.padding = unit(1, "lines"), alpha = 1, max.time = 10, point.padding = 0.5, force = 1, size = 6)+
theme_classic(base_size = 24)
#Filtered out some labels that were causing too many overlaps, couldn't properly read labels
ggsave("volcanoplot.png", width = 12, height = 7)
```
```{r spearman correlation}
spearcor_input <- deseq_results_tidy %>%
filter(padj < 0.05) %>%
select(2)
spearcor_meta <- sample_data(PD_phyloseq_processed)
taxadata <- otu_table(PD_phyloseq_processed) %>% as.data.frame()
taxadata$asv <- row.names(taxadata)
taxadata <- taxadata %>% pivot_longer(F1256:111, names_to = "sample", values_to = "abundance") %>%
group_by(sample) %>%
mutate(rel_abundance = abundance/sum(abundance)) %>%
select(-abundance) %>%
pivot_wider(names_from = "sample", values_from = "rel_abundance")
spearcor_taxa <- taxadata %>%
filter(asv %in% row.names(spearcor_input)) %>% t() %>% as.data.frame()
colnames(spearcor_taxa) <- spearcor_taxa[1,]
spearcor_taxa_clean <- spearcor_taxa[-1,] %>%
merge(spearcor_meta[,"Age_onset"], by.x = "row.names", by.y = "row.names")
cor_results <- spearcor_taxa_clean[,3:ncol(spearcor_taxa_clean)-1] %>% apply(2, function(x){
# print(as.numeric(x))
abundance_numeric <- as.numeric(x)
cor_result <- cor.test(x = abundance_numeric, y = spearcor_taxa_clean$Age_onset)
c(cor_result[3][[1]], cor_result[4][[1]])
})
cor_results <- as.data.frame(cor_results) %>% t() %>% as.data.frame()
colnames(cor_results) <- c("pval", "cor_coeff")
cor_results$padj <- cor_results$pval %>%
p.adjust("fdr")
cor_results %>% filter(padj < 0.05)
```
```{r}
rel_abund <- spearcor_taxa_clean[,"c6faae06a70c97500e4f329b617e70b7"] %>% as.numeric()
age_onset <- spearcor_taxa_clean$Age_onset
plot(spearcor_taxa_clean[,"c6faae06a70c97500e4f329b617e70b7"], spearcor_taxa_clean$Age_onset)
ggplot()+
geom_point(aes(x = age_onset, y = rel_abund))+
geom_smooth(aes(x = age_onset, y = rel_abund), method = "lm")
```
```{r}
otu_alldata <- OTU %>% as.data.frame()
otu_alldata$asv <- row.names(otu_alldata)
otu_alldata <- otu_alldata %>%
pivot_longer(F1052:300, names_to = "sample", values_to = "abundance") %>%
group_by(sample) %>%
mutate(rel_abundance = abundance/sum(abundance)) %>%
select(-abundance) %>%
pivot_wider(names_from = "sample", values_from = "rel_abundance") %>%
as.data.frame()
row.names(otu_alldata) <- otu_alldata$asv
otu_alldata <- otu_alldata %>% select(-asv) %>%
t() %>% as.data.frame()
temp <- all_data_phyloseq@otu_table %>% t() %>% as.data.frame()
deseq_results_tidy
corrplot_data <- meta_df %>% select(Disease, Age, Age_onset, Fish, Sex) %>%
merge(otu_alldata["b21cc0edd44ff27de967257d89a72e2d"], by = "row.names")
# ggplot(corrplot_data, aes(`c6faae06a70c97500e4f329b617e70b7`, Age, color = Fish > 27.3)) +
# geom_point()+
# geom_smooth(method = "lm")+
# # xlim(c(0, 0.04))+
# facet_grid(. ~Sex)
ggplot(corrplot_data, aes(`b21cc0edd44ff27de967257d89a72e2d`, Age, color = Disease)) +
geom_point()+
geom_smooth(method = "lm")+
# xlim(c(0.000000001, 0.04))+
facet_grid(Sex ~.)+
theme(text = element_text(size = 16))+
labs(x = "Intestinimonas relative abundance")
ggsave("corrplot_intestinimonas.png", width = 8, height = 6)
```
```{r}
all_data_phyloseq <- phyloseq(OTU, meta_df, TAX, tree)
all_data_phyloseq_glommed <- all_data_phyloseq %>%
tax_glom(taxrank = "Genus")
taxadata_alldata <- all_data_phyloseq_glommed@otu_table %>%
merge(all_data_phyloseq_glommed@tax_table %>% as.data.frame(), by.x = "row.names", by.y = "row.names")
taxadata_alldata <- taxadata_alldata %>% pivot_longer(F1052:F9959, names_to = "sample", values_to = "abundance") %>%
group_by(sample) %>%
mutate(rel_abundance = abundance) %>% #/sum(abundance)) %>%
select(-abundance) %>%
pivot_wider(names_from = "sample", values_from = "rel_abundance") %>%
select(Genus, F1052:F9959)
heatmap_data <- data.frame(asv = c(), male_pval = c(), male_estimate = c(), female_pval = c(), female_estimate = c(), direction = c())
for (species in deseq_results_tidy$Genus[deseq_results_tidy$padj <= 0.05 & abs(deseq_results_tidy$log2FoldChange) > 1 & !grepl("uncultured", deseq_results_tidy$Genus)]){
loop_data <- taxadata_alldata %>% filter(Genus == species) %>%
pivot_longer(cols = F1052:301, names_to = "sample", values_to = "rel_abundance") %>%
merge(meta_df, by.x = "sample", by.y = "row.names")
loop_direction <- ifelse(deseq_results_tidy$log2FoldChange[deseq_results_tidy$Genus == species] > 0, "Increase", "Decrease")
loop_male_lm <- lm(Age ~ Disease * rel_abundance + Energy_kj + BMI, data = loop_data[loop_data$Sex == "Male",], na.action = na.omit) %>%
tidy()
male_pval <- loop_male_lm$p.value[[6]]
male_estimate <- loop_male_lm$estimate[[6]]
loop_female_lm <- lm(Age ~ Disease * rel_abundance + Energy_kj + BMI, data = loop_data[loop_data$Sex == "Female",], na.action = na.omit) %>%
tidy()
female_pval <- loop_female_lm$p.value[[6]]
female_estimate <- loop_female_lm$estimate[[6]]
loop_results <- data.frame(asv = c(species), male_pval = c(male_pval), male_estimate = c(male_estimate), female_pval = c(female_pval), female_estimate = c(female_estimate), direction = loop_direction)
heatmap_data <- bind_rows(heatmap_data, loop_results)
}
heatmap_data$male_pval <- heatmap_data$male_pval %>% p.adjust(method = "fdr")
heatmap_data$female_pval <- heatmap_data$female_pval %>% p.adjust(method = "fdr")
heatmap_data_adj <- heatmap_data %>%
mutate(Genus = str_match(asv, "g__(.*)")[,2]) %>%
column_to_rownames("Genus") %>%
select(-asv) %>%
na.omit()
heatmap_input <- heatmap_data_adj[,c(2,4)] %>%
as.matrix()
log2fc_pheatmap <- pheatmap(heatmap_input, color = colorRampPalette(c("#2265FF", "#FF4949"))(100),
breaks = c(seq(-1,-0.2001, 0.8/10), seq(-0.2,-0.0001, 0.2/40), seq(0,0.2, 0.2/40), seq(0.2001,1, 0.8/10)),
cluster_rows = T,
cluster_cols = F,
cellwidth = 25,
treeheight_row = 0,
annotation_row = heatmap_data_adj %>% select("Log2FC(High-Low)" = direction))
ggsave("log2FC_pheatmap_lm.png", plot = log2fc_pheatmap, height = 5, width = 6)
deseq_results_tidy %>%
filter(padj <= 0.05) %>%
arrange(log2FoldChange)
```
```{r}
distribution_data <- meta_fishonset %>%
merge(otu_alldata["b21cc0edd44ff27de967257d89a72e2d"], by = "row.names")%>%
mutate(sample = factor(Row.names))
distribution_data %>%
ggplot(aes(x = as.character(Row.names), y = b21cc0edd44ff27de967257d89a72e2d))+
geom_bar(stat = "identity")+
facet_wrap(facets = "fish_age_status")+
theme(axis.text.x = element_text(angle = 90))
colorRamp_15 <- colorRampPalette(brewer.pal(8, "RdYlBu"))(15)
distribution_data %>%
group_by(b21cc0edd44ff27de967257d89a72e2d, fish_age_status) %>%
sample_n(1) %>%
filter(b21cc0edd44ff27de967257d89a72e2d != 0)%>%
ggplot(aes(x = factor(sample), y = b21cc0edd44ff27de967257d89a72e2d))+
geom_bar(stat = "identity", width = 0.7, fill = "#f88605", color = "black", size = 1)+
facet_grid(fish_age_status~., scale = "free")+
coord_flip()+
theme(axis.text = element_text(size = 14), text = element_text(size = 17))+
labs(x = "Fish and Age of Onset Status", y = "Relative abundance")
ggsave("intestimonas_dist.png", width = 6, height = 8)
distribution_data %>%
group_by(fish_age_status, b21cc0edd44ff27de967257d89a72e2d == 0)%>%
summarize(count = n())%>%
mutate(prop = count/sum(count), prop_neg = 1 - prop) %>%
filter(`b21cc0edd44ff27de967257d89a72e2d == 0`) %>%
ggplot()+
geom_bar(aes(x = reorder(fish_age_status, prop_neg, decreasing = TRUE), y = prop_neg), stat = "identity", width = 0.7, fill = "#f88605", color = "black", size = 1)+
labs(x = "Fish and Age of Onset Status", y = "Prevalence (Out of Male PD)")
ggsave("intestimonas_prevalence.png", width = 6, height = 5)
meta_fishonset %>%
ggplot(aes(x = Age, y = age_onset))+
geom_point()+
geom_smooth(method = "lm")+
geom_label(aes(x = 71, y = 45, label = paste0("Correlation coefficient = ",round(cor(meta_fishonset$Age, meta_fishonset$Age_onset), 2))),size = 6)+
labs(y = "Age of disease onset", x = "Current age at time of study")+
theme(axis.text = element_text(size = 13))
ggsave("cor_age_ageOnset.png", width = 7, height = 4.5)
```
```{r}
palette_Graph <- palette.colors(palette = "Set1")
meta_df %>%
ggplot()+
geom_density(aes(x = Age_onset), fill = "gray", alpha = 0.6)+
geom_vline(xintercept = median(meta_df$Age_onset, na.rm = TRUE))+
labs(x = "Age of onset", y = "Density")
ggsave("AgeOnset_density.png", width = 7, height = 4.5)
meta_df %>%
ggplot()+
geom_density(aes(x = Fish, fill = factor(Sex, levels = c("Male", "Female"))), alpha = 0.4)+
geom_vline(xintercept = median(meta_df$Fish[meta_df$Sex == "Male"], na.rm = TRUE), color = "#F8766D")+
geom_vline(xintercept = median(meta_df$Fish[meta_df$Sex == "Female"], na.rm = TRUE), color = "#00BFC4")+
labs(x = "Fish Intake", y = "Density", fill = "Sex")
ggsave("densityFish.png", width = 7, height = 4.5)
```
```{r MetaCyc}
SMP <- sample_data(metafish_new)
# Create new phyloseq object with MetaCyc terms
# metacyc <- read_delim("path_abun_unstrat.tsv", delim="\t") %>%
# merge(meta_df, by.x = "sequence", by.y = "row.names")
mc <- data.table::fread("path_abun_unstrat.tsv") %>%
column_to_rownames("pathway")
t = mc %>% mutate(mc_term = rownames(mc),Genus = rownames(mc)) %>% select(mc_term,Genus)
biom <- phyloseq(otu_table(as.matrix(mc),taxa_are_rows = T), SMP)
# Geometric mean function
# gm_mean = function(x, na.rm=TRUE){
# exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))
# }
abundance_filter <- 0.000
pval <- 0.05
prev_filter <- 0
# Prevalence filter
prev_table <- biom@otu_table@.Data %>% as.data.frame()
phylo_prev_filtered = prune_taxa(apply(prev_table, 1, function(x) sum(x != 0, na.rm = TRUE)/length(x[!is.na(x)]))>prev_filter,biom)
# Abundance filter
phylo_abundance_filtered = prune_taxa((taxa_sums(transform(phylo_prev_filtered,'compositional'))/nsamples(phylo_prev_filtered))>abundance_filter,phylo_prev_filtered)
phyloseq_filtered_p1 <- transform_sample_counts(phylo_abundance_filtered, function(x) x+1)
# # Run deseq2
temp2 <- phyloseq_to_deseq2(phyloseq_filtered_p1, ~Fish_intake)
# #
# temp2 = estimateSizeFactors(temp2, geoMeans = geoMeans)
temp2 = DESeq(temp2, fitType="local")
temp2$Fish_intake <- factor(temp2$Fish_intake,levels = c("low_fish", "high_fish"))
res = results(temp2)
res = res[order(res$padj), ]
metaCyc_data <- res %>% as.data.frame() %>%
filter(padj <= 0.05) %>%
rownames_to_column("pathway")
pathway_names <- tibble(name = metaCyc_data$pathway, value = c("sulfoquinovose degradation I",
"superpathway of lipopolysaccharide biosynthesis",
"phenylacetate degradation I (aerobic)",
"superpathway of phenylethylamine degradation",
"superpathway of L-threonine metabolism",
"4-hydroxyphenylacetate degradation",
"formaldehyde assimilation I (serine pathway)",
"aerobactin biosynthesis",
"phosphopantothenate biosynthesis III",
"mevalonate pathway II (archaea)",
"peptidoglycan biosynthesis V (β-lactam resistance)"))
metaCycplot_data <- metaCyc_data %>%
merge(pathway_names, by.x = "pathway", by.y = "name") %>%
mutate(direction = case_when(log2FoldChange > 0 ~ "#f94449",
log2FoldChange < 0 ~ "lightblue"))
ggplot(metaCycplot_data)+
geom_col(aes(x = reorder(value, log2FoldChange), y = log2FoldChange, fill = direction), width = 0.8, color = "black")+
labs(x = "MetaCyc pathway", y = "Log2FC(high_fish-low_fish)", fill = "Direction")+
geom_errorbar(aes(x=reorder(value, log2FoldChange), ymin=log2FoldChange-lfcSE, ymax=log2FoldChange+lfcSE), width = 0.2)+
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))+
scale_fill_discrete(labels = c("High", "Low"))+
coord_flip()+
theme_classic(base_size = 22)
ggsave("metaCyc.png", height = 7, width = 14)
```