# Preliminary processing in QIIME2.

# !/bin/bash
# SETTING UP QIIME2 AND IMPORTING THE DATA

# Make a directory for the data and navigate into that directory
# download no_duplicates_metadata.tsv to server directory using 'scp' command from local desktop

# import seqs
qiime tools import \
--type "SampleData[SequencesWithQuality]" \
--input-format SingleEndFastqManifestPhred33V2 \
--input-path ./manifest_update.tsv \
--output-path demux_seqs.qza

qiime demux summarize \
  --i-data ./demux_seqs.qza \
  --o-visualization ./demux_seqs.qzv

# DADA2 denoising and QC, keeping all 150 bases as they all appeared to be fairly even and above the 30 Phred score
qiime dada2 denoise-single \
  --i-demultiplexed-seqs ./demux_seqs.qza \
  --p-trunc-len 150 \
  --o-table ./dada2_table.qza \
  --o-representative-sequences ./dada2_rep_set.qza \
  --o-denoising-stats ./dada2_stats.qza

# make feature table
qiime feature-table summarize \
  --i-table ./dada2_table.qza \
  --m-sample-metadata-file ./no_duplicates_metadata.tsv \
  --o-visualization ./dada2_table.qzv

# visualize DADA2 stats
qiime metadata tabulate \
  --m-input-file dada2_stats.qza \
  --o-visualization dada2_stats.qzv

qiime feature-table tabulate-seqs \
  --i-data dada2_rep_set.qza \
  --o-visualization dada2_rep_set.qzv

# TAXONOMY CLASSIFICATION

qiime feature-classifier classify-sklearn \
  --i-classifier /mnt/datasets/classifiers/silva-138-99-515-806-nb-classifier.qza \
  --i-reads rep-seqs.qza \
  --o-classification taxonomy.qza

qiime metadata tabulate \
  --m-input-file taxonomy.qza \
  --o-visualization taxonomy.qzv

#Generating phylogenetic tree
qiime phylogeny align-to-tree-mafft-fasttree \
  --i-sequences dada2_rep_set.qza \
  --o-alignment aligned-rep-set.qza \
  --o-masked-alignment masked-aligned-rep-seqs.qza \
  --o-tree unrooted-tree.qza \
  --o-rooted-tree rooted-tree.qza

# FILTERING

#Filtering out mitochondria and chloroplasts
qiime taxa filter-table \
  --i-table dada2_table.qza \
  --i-taxonomy taxonomy.qza \
  --p-exclude mitochondria,chloroplast \
  --o-filtered-table table-no-mitochondria-no-chloroplast.qza

# Filter out samples with “NA” listed in ‘probiotic_mode’ and ‘probiotic’ columns
qiime feature-table filter-samples \
  --i-table table-no-mitochondria-no-chloroplast.qza \
  --m-metadata-file no_duplicates_metadata.tsv \
  --p-where "NOT [probiotic_mode]='NA'AND NOT [probiotic]='NA'" \
  --o-filtered-table Q2-filtered-table.qza

# Visualize filtered table
qiime feature-table summarize \
  --i-table ./Q2-filtered-table.qza \
  --m-sample-metadata-file ./no_duplicates_metadata.tsv \
  --o-visualization ./Q2-filtered-table.qzv

# Visualize alpha rarefaction curves
qiime diversity alpha-rarefaction \
  --i-table ./Q2-filtered-table.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --o-visualization ./Q2_alpha_rarefaction_curves.qzv \
  --p-min-depth 10 \
  --p-max-depth 40000

# Alpha and Beta analyses in QIIME2.

# Calculate alpha- and beta-diversity metrics Q1
qiime diversity core-metrics-phylogenetic \
  --i-phylogeny rooted-tree.qza \
  --i-table Q1-filtered-table.qza \
  --p-sampling-depth 25126 \
  --m-metadata-file no_duplicates_metadata.tsv \
  --output-dir Q1-new-core-metrics-results

# Calculate alpha- and beta-diversity metrics Q2
qiime diversity core-metrics-phylogenetic \
  --i-phylogeny rooted-tree.qza \
  --i-table Q2-filtered-table.qza \
  --p-sampling-depth 21146 \
  --m-metadata-file no_duplicates_metadata.tsv \
  --output-dir Q2-new-core-metrics-results

# Calculate alpha group significance Q2
qiime diversity alpha-group-significance \
  --i-alpha-diversity ./Q2-new-core-metrics-results/faith_pd_vector.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --o-visualization ./Q2-new-core-metrics-results/Q2_faith_pd_statistics.qzv

qiime diversity alpha-group-significance \
  --i-alpha-diversity ./Q2-new-core-metrics-results/evenness_vector.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --o-visualization ./Q2-new-core-metrics-results/Q2_evenness_statistics.qzv

qiime diversity alpha-group-significance \
  --i-alpha-diversity ./Q2-new-core-metrics-results/observed_features_vector.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --o-visualization ./Q2-new-core-metrics-results/Q2_observed_features_statistics.qzv

# Bray_Curtis_distance_matrix

# ‘probiotic’ column
qiime diversity beta-group-significance \
  --i-distance-matrix ./Q2-new-core-metrics-results/bray_curtis_distance_matrix.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --m-metadata-column probiotic \
  --p-pairwise \
  --o-visualization ./Q2-new-core-metrics-results/Q2_bray_curtis_probiotic.qzv

# ‘probiotic_mode’ column
qiime diversity beta-group-significance \
  --i-distance-matrix ./Q2-new-core-metrics-results/bray_curtis_distance_matrix.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --m-metadata-column probiotic_mode \
  --p-pairwise \
  --o-visualization ./Q2-new-core-metrics-results/Q2_bray_curtis_probiotic_mode.qzv

# Jaccard_distance_matrix

# ‘probiotic’ column
qiime diversity beta-group-significance \
  --i-distance-matrix ./Q2-new-core-metrics-results/jaccard_distance_matrix.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --m-metadata-column probiotic \
  --p-pairwise \
  --o-visualization ./Q2-new-core-metrics-results/Q2_jaccard_distance_probiotic.qzv

# ‘probiotic_mode’ column
qiime diversity beta-group-significance \
  --i-distance-matrix ./Q2-new-core-metrics-results/jaccard_distance_matrix.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --m-metadata-column probiotic_mode \
  --p-pairwise \
  --o-visualization ./Q2-new-core-metrics-results/Q2_jaccard_distance_probiotic_mode.qzv

# unweighted_unifrac_distance_matrix

# ‘probiotic’ column
qiime diversity beta-group-significance \
  --i-distance-matrix ./Q2-new-core-metrics-results/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --m-metadata-column probiotic \
  --p-pairwise \
  --o-visualization ./Q2-new-core-metrics-results/Q2_unweighted_unifrac_probiotic_.qzv

# ‘probiotic_mode’ column
qiime diversity beta-group-significance \
  --i-distance-matrix ./Q2-new-core-metrics-results/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --m-metadata-column probiotic_mode \
  --p-pairwise \
  --o-visualization ./Q2-new-core-metrics-results/Q2_unweighted_unifrac_probiotic_mode.qzv

# Weighted_unifrac_distance_matrix

# ‘probiotic’ column
qiime diversity beta-group-significance \
  --i-distance-matrix ./Q2-new-core-metrics-results/weighted_unifrac_distance_matrix.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --m-metadata-column probiotic \
  --p-pairwise \
  --o-visualization ./Q2-new-core-metrics-results/Q2_weighted_unifrac_probiotic_.qzv

# ‘probiotic_mode’ column
qiime diversity beta-group-significance \
  --i-distance-matrix ./Q2-new-core-metrics-results/weighted_unifrac_distance_matrix.qza \
  --m-metadata-file ./no_duplicates_metadata.tsv \
  --m-metadata-column probiotic_mode \
  --p-pairwise \
  --o-visualization ./Q2-new-core-metrics-results/Q2_weighted

# Generating differential and relative abundance plots in R.


# Working in QIIME2 

# !/bin/bash
#Export QIIME files to work in R
qiime tools export \
--input-path taxonomy.qza \
--output-path exported

qiime tools export \
--input-path rooted-tree.qza \
--output-path exported

qiime tools export \
--input-path Q2-filtered-table.qza \
--output-path exported

# Edit the column names exported/taxonomy.tsv
sed \
'1 s/Feature ID/#OTUID/;1 s/Taxon/taxonomy/;1 s/Confidence/confidence/' \
exported/taxonomy.tsv \
> exported/biom-taxonomy.tsv

biom add-metadata \
-i exported/feature-table.biom \
-o exported/Q2-table-with-taxonomy.biom \
--observation-metadata-fp exported/biom-taxonomy.tsv \
--sc-separated taxonomy