150 lines
4.4 KiB
Bash
150 lines
4.4 KiB
Bash
#!/bin/bash
|
|
set -xe
|
|
bam_s3_bucket="serenomica-pipeline-archive"
|
|
upload_s3=false
|
|
stellarpgx_ref="stellar_references/cyp2d6_reference_HerCan_v2.bed" #"stellar_references/cyp2d6_reference_pgx_copy.bed" #"stellar_references/cyp2d6_reference_pgx.bed" #"stellar_references/nicole_new_ref.bed"
|
|
|
|
file=${1--}
|
|
declare -A vcf_s3_dict
|
|
declare -A bam_s3_dict
|
|
|
|
echo "----------------------------------------------------------------"
|
|
|
|
# ------------------------ Start PGx Engine --------------------------
|
|
echo -n "Start Docker container: "
|
|
docker start pgx-api
|
|
|
|
if [ -z "$SKIP_STELLAR" ]; then
|
|
# ------------------------ Empty data folder -------------------------
|
|
if ! [ -z "$(ls -A data)" ]; then
|
|
echo "Remove existing files in data folder"
|
|
rm -r data/*
|
|
fi
|
|
|
|
# --------------------- Download files from S3 -----------------------
|
|
while IFS= read vcf_s3; do
|
|
temp=($(echo $vcf_s3 | tr "/" "\n"))
|
|
vcf_s3_bucket="${temp[1]}"
|
|
flowcell="${temp[2]}"
|
|
run="${temp[3]}"
|
|
sample="${temp[4]/.vcf/}"
|
|
|
|
vcf_s3_prefix="s3://${vcf_s3_bucket}/${flowcell}/${run}"
|
|
bam_s3_prefix="s3://${bam_s3_bucket}/${run}"
|
|
|
|
vcf_s3_dict[$sample]=$vcf_s3_prefix
|
|
bam_s3_dict[$sample]=$bam_s3_prefix
|
|
|
|
bam_s3="${bam_s3_prefix}/${sample}.tar.gz"
|
|
s3cmd get $vcf_s3 data/
|
|
s3cmd get $bam_s3 data/
|
|
cd data/
|
|
tar xf ${sample}.tar.gz
|
|
ls -l
|
|
rm ${sample}.tar.gz
|
|
cd ..
|
|
echo "SAMPLE DOWNLOAD COMPlETE"
|
|
done < <(cat -- "$file")
|
|
echo "ALL DOWNLOADS COMPlETE"
|
|
|
|
# ------------------------ Index BAM files ---------------------------
|
|
for f in data/*.bam; do
|
|
if [ ! -f "$f" ]; then
|
|
echo "No BAM file downloaded or found."
|
|
exit 1
|
|
fi
|
|
if [ ! -f "$f.bai" ]; then
|
|
echo "Indexing: "$f
|
|
#BAM=${f/.cram/.bam}
|
|
#samtools view -T hg38/Homo_sapiens_assembly38.fasta -b -o $BAM $f
|
|
samtools index "$f" "$f.bai"
|
|
fi
|
|
done
|
|
|
|
# ------------------------ Run Stellar PGx ---------------------------
|
|
echo "Starting stellar PGx"
|
|
cp $stellarpgx_ref resources/cyp2d6/res_hg38/test3.bed
|
|
./run_stellar.sh
|
|
|
|
# ------------------- Generate final PGx Output ----------------------
|
|
for f in data/*.vcf; do
|
|
s=${f::-4}
|
|
#if [[ $s = *_calls ]]; then
|
|
s=${s/_calls/}
|
|
printf "\nRun PGx Engine for: ${s}_calls.vcf\n"
|
|
cat $f | grep -E '^#' | sed 's/##contig=<ID=/##contig=<ID=chr/' > ${s}_outside.vcf
|
|
cat $f | grep -vE '^#' | xargs -I {} echo chr{} | sed 's/\t[.]\tAC=/\tPASS\tAC=/' >> ${s}_outside.vcf
|
|
|
|
python3 combine_outside_calls.py $s
|
|
curl -X 'POST' \
|
|
'http://localhost:5000/run' \
|
|
-H 'accept: application/json' \
|
|
-H 'Content-Type: multipart/form-data' \
|
|
-F "files=@${s}_outside.vcf;type=text/x-vcard" \
|
|
-F "files=@${s}_outside_call.tsv" \
|
|
-o "${s}_pgx_result.zip"
|
|
docker logs pgx-api > data/engine.log 2>&1
|
|
#fi
|
|
done
|
|
else
|
|
while IFS= read vcf_s3; do
|
|
temp=($(echo $vcf_s3 | tr "/" "\n"))
|
|
vcf_s3_bucket="${temp[1]}"
|
|
flowcell="${temp[2]}"
|
|
run="${temp[3]}"
|
|
sample="${temp[4]/.vcf/}"
|
|
|
|
vcf_s3_prefix="s3://${vcf_s3_bucket}/${flowcell}/${run}"
|
|
bam_s3_prefix="s3://${bam_s3_bucket}/${run}"
|
|
|
|
vcf_s3_dict[$sample]=$vcf_s3_prefix
|
|
bam_s3_dict[$sample]=$bam_s3_prefix
|
|
done
|
|
fi
|
|
|
|
# ----------------------- Stop PGx Engine ----------------------------
|
|
echo ""
|
|
echo -n "Stop Docker container: "
|
|
docker stop pgx-api
|
|
|
|
if [ -z "$SKIP_OVERVIEW" ]; then
|
|
# ---------------------- Post-Processing -----------------------------
|
|
mkdir -p pgx_results
|
|
if ! [ -z "$(ls -A pgx_results)" ]; then
|
|
echo "Remove existing files in pgx_results folder"
|
|
rm -r pgx_results/*
|
|
fi
|
|
echo "Copy PGx Output to pgx_results"
|
|
cp data/*.zip pgx_results/
|
|
cp data/engine.log pgx_results/
|
|
cp data/*.alleles pgx_results/
|
|
|
|
echo "Unzip and create diplotype/rsid overview"
|
|
python3 create_overview.py
|
|
|
|
#echo "Copy over allele files"
|
|
#python3 copy_allele_file.py
|
|
|
|
rm -r work/*
|
|
rm data/*outside*
|
|
rm data/*zip
|
|
rm data/engine.log
|
|
rm data/*.alleles
|
|
fi
|
|
rm -f pgx_results/engine.log
|
|
# --------------------- Upload files to S3 ---------------------------
|
|
if $upload_s3; then
|
|
for result in $(ls -d pgx_results/*); do
|
|
sample="${result:12:-11}"
|
|
vcf_s3_prefix=${vcf_s3_dict[$sample]}
|
|
bam_s3_prefix=${bam_s3_dict[$sample]}
|
|
s3cmd put "${result}/output.json" "${vcf_s3_prefix}/${sample}.json"
|
|
echo -n "Compressing: "
|
|
tar -zcvf "${result}.tar.gz" "${result}"
|
|
s3cmd put "${result}.tar.gz" "${bam_s3_prefix}/"
|
|
done
|
|
fi
|
|
|
|
printf "\nDone\n"
|
|
echo "----------------------------------------------------------------"
|