#!/bin/bash bam_s3_bucket="quantgene-wes-archive" upload_s3='true' download='true' index_bam='true' dev_suffix="dev_sref_01" stellarpgx_ref="stellar_references/cyp2d6_reference_pgx.bed" declare -A vcf_s3_dict declare -A bam_s3_dict declare -A stellarpgx_ref_dict=( ["sref_00"]="test3.bed" ["sref_01"]="cyp2d6_reference_pgx.bed" ["sref_02"]="cyp2d6_reference_wes_jan6.bed" ["sref_03"]="cyp2d6_reference_wes_march21.bed" ) while getopts ':r:nx' 'OPTKEY'; do case ${OPTKEY} in 'r') stellarpgx_ref="stellar_references/"${stellarpgx_ref_dict[$OPTARG]} dev_suffix="dev_"$OPTARG ;; 'n') download='false' ;; 'x') index_bam='false' ;; '?') echo "INVALID OPTION -- ${OPTARG}" >&2 exit 1 ;; ':') echo "MISSING ARGUMENT for option -- ${OPTARG}" >&2 exit 1 ;; *) echo "UNIMPLEMENTED OPTION -- ${OPTKEY}" >&2 exit 1 ;; esac done echo "----------------------------------------------------------------" # ------------------------ Start PGx Engine -------------------------- echo -n "Start Docker container: " docker start pgx-api # ------------------------ Empty data folder ------------------------- if $download; then if ! [ -z "$(ls -A data)" ]; then echo "Remove existing files in data folder" rm -r data/* fi fi # --------------------- Download files from S3 ----------------------- while IFS= read vcf_s3; do temp=($(echo $vcf_s3 | tr "/" "\n")) vcf_s3_bucket="${temp[1]}" flowcell="${temp[2]}" run="${temp[3]}" sample="${temp[4]::-4}" vcf_s3_prefix="s3://${vcf_s3_bucket}/${flowcell}/${run}" bam_s3_prefix="s3://${bam_s3_bucket}/${run}" vcf_s3_dict[$sample]=$vcf_s3_prefix bam_s3_dict[$sample]=$bam_s3_prefix bam_s3="${bam_s3_prefix}/${sample}.deduped.bam" if $download; then aws s3 cp $vcf_s3 data/ aws s3 cp $bam_s3 data/ echo "SAMPLE DOWNLOAD COMPlETE" fi done echo "ALL DOWNLOADS COMPlETE" # ------------------------ Index BAM files --------------------------- if $index_bam; then for f in data/*.bam; do echo "Indexing: "$f samtools index $f $f".bai" done fi # ------------------------ Run Stellar PGx --------------------------- echo "Starting stellar PGx" cp $stellarpgx_ref resources/cyp2d6/cyp_hg38/test3.bed ./run_stellar.sh # ------------------- Generate final PGx Output ---------------------- for f in data/*.vcf; do s=${f::-4} printf "\nRun PGx Engine for: ${s:5}\n" python3 combine_outside_calls.py $s curl -X 'POST' \ 'http://localhost:5000/run' \ -H 'accept: application/json' \ -H 'Content-Type: multipart/form-data' \ -F "files=@${s}.vcf;type=text/x-vcard" \ -F "files=@${s}_outside_call.tsv" \ -o "${s}_pgx_result.zip" done # ----------------------- Stop PGx Engine ---------------------------- echo "" echo -n "Stop Docker container: " docker stop pgx-api # ---------------------- Post-Processing ----------------------------- if ! [ -z "$(ls -A pgx_results)" ]; then echo "Remove existing files in pgx_results folder" rm -r pgx_results/* fi echo "Copy PGx Output to pgx_results" cp data/*.zip pgx_results/ echo "Unzip and create diplotype/rsid overview" python3 create_overview.py echo "Copy over allele files" python3 copy_allele_file.py rm -r work/* rm data/*outside* rm data/*zip rm data/*.alleles # --------------------- Upload files to S3 --------------------------- if $upload_s3; then for result in $(ls -d pgx_results/*); do sample="${result:12:-11}" vcf_s3_prefix=${vcf_s3_dict[$sample]} bam_s3_prefix=${bam_s3_dict[$sample]} aws s3 cp "${result}/output.json" "${vcf_s3_prefix}/${sample}.${dev_suffix}.json" --storage-class INTELLIGENT_TIERING echo -n "Compressing: " tar -zcvf "${result}.${dev_suffix}.tar.gz" "${result}" aws s3 cp "${result}.${dev_suffix}.tar.gz" "${bam_s3_prefix}/" --storage-class INTELLIGENT_TIERING done fi printf "\nDone\n" echo "----------------------------------------------------------------"