#!/bin/bash set -xe bam_s3_bucket="serenomica-pipeline-archive" upload_s3=false stellarpgx_ref="stellar_references/cyp2d6_reference_HerCan_v2.bed" #"stellar_references/cyp2d6_reference_pgx_copy.bed" #"stellar_references/cyp2d6_reference_pgx.bed" #"stellar_references/nicole_new_ref.bed" file=${1--} declare -A vcf_s3_dict declare -A bam_s3_dict echo "----------------------------------------------------------------" # ------------------------ Start PGx Engine -------------------------- echo -n "Start Docker container: " docker start pgx-api if [ -z "$SKIP_STELLAR" ]; then # ------------------------ Empty data folder ------------------------- if ! [ -z "$(ls -A data)" ]; then echo "Remove existing files in data folder" rm -r data/* fi # --------------------- Download files from S3 ----------------------- while IFS= read vcf_s3; do temp=($(echo $vcf_s3 | tr "/" "\n")) vcf_s3_bucket="${temp[1]}" flowcell="${temp[2]}" run="${temp[3]}" sample="${temp[4]/.vcf/}" vcf_s3_prefix="s3://${vcf_s3_bucket}/${flowcell}/${run}" bam_s3_prefix="s3://${bam_s3_bucket}/${run}" vcf_s3_dict[$sample]=$vcf_s3_prefix bam_s3_dict[$sample]=$bam_s3_prefix bam_s3="${bam_s3_prefix}/${sample}.tar.gz" s3cmd get $vcf_s3 data/ s3cmd get $bam_s3 data/ cd data/ tar xf ${sample}.tar.gz ls -l rm ${sample}.tar.gz cd .. echo "SAMPLE DOWNLOAD COMPlETE" done < <(cat -- "$file") echo "ALL DOWNLOADS COMPlETE" # ------------------------ Index BAM files --------------------------- for f in data/*.bam; do if [ ! -f "$f" ]; then echo "No BAM file downloaded or found." exit 1 fi if [ ! -f "$f.bai" ]; then echo "Indexing: "$f #BAM=${f/.cram/.bam} #samtools view -T hg38/Homo_sapiens_assembly38.fasta -b -o $BAM $f samtools index "$f" "$f.bai" fi done # ------------------------ Run Stellar PGx --------------------------- echo "Starting stellar PGx" cp $stellarpgx_ref resources/cyp2d6/res_hg38/test3.bed ./run_stellar.sh # ------------------- Generate final PGx Output ---------------------- for f in data/*.vcf; do s=${f::-4} #if [[ $s = *_calls ]]; then s=${s/_calls/} printf "\nRun PGx Engine for: ${s}_calls.vcf\n" cat $f | grep -E '^#' | sed 's/##contig= ${s}_outside.vcf cat $f | grep -vE '^#' | xargs -I {} echo chr{} | sed 's/\t[.]\tAC=/\tPASS\tAC=/' >> ${s}_outside.vcf python3 combine_outside_calls.py $s curl -X 'POST' \ 'http://localhost:5000/run' \ -H 'accept: application/json' \ -H 'Content-Type: multipart/form-data' \ -F "files=@${s}_outside.vcf;type=text/x-vcard" \ -F "files=@${s}_outside_call.tsv" \ -o "${s}_pgx_result.zip" docker logs pgx-api > data/engine.log 2>&1 #fi done else while IFS= read vcf_s3; do temp=($(echo $vcf_s3 | tr "/" "\n")) vcf_s3_bucket="${temp[1]}" flowcell="${temp[2]}" run="${temp[3]}" sample="${temp[4]/.vcf/}" vcf_s3_prefix="s3://${vcf_s3_bucket}/${flowcell}/${run}" bam_s3_prefix="s3://${bam_s3_bucket}/${run}" vcf_s3_dict[$sample]=$vcf_s3_prefix bam_s3_dict[$sample]=$bam_s3_prefix done fi # ----------------------- Stop PGx Engine ---------------------------- echo "" echo -n "Stop Docker container: " docker stop pgx-api if [ -z "$SKIP_OVERVIEW" ]; then # ---------------------- Post-Processing ----------------------------- mkdir -p pgx_results if ! [ -z "$(ls -A pgx_results)" ]; then echo "Remove existing files in pgx_results folder" rm -r pgx_results/* fi echo "Copy PGx Output to pgx_results" cp data/*.zip pgx_results/ cp data/engine.log pgx_results/ cp data/*.alleles pgx_results/ echo "Unzip and create diplotype/rsid overview" python3 create_overview.py #echo "Copy over allele files" #python3 copy_allele_file.py rm -r work/* rm data/*outside* rm data/*zip rm data/engine.log rm data/*.alleles fi rm -f pgx_results/engine.log # --------------------- Upload files to S3 --------------------------- if $upload_s3; then for result in $(ls -d pgx_results/*); do sample="${result:12:-11}" vcf_s3_prefix=${vcf_s3_dict[$sample]} bam_s3_prefix=${bam_s3_dict[$sample]} s3cmd put "${result}/output.json" "${vcf_s3_prefix}/${sample}.json" echo -n "Compressing: " tar -zcvf "${result}.tar.gz" "${result}" s3cmd put "${result}.tar.gz" "${bam_s3_prefix}/" done fi printf "\nDone\n" echo "----------------------------------------------------------------"