reproduce_setup/pgx-main/get_pgx_result.sh

#!/bin/bash
set -xe
bam_s3_bucket="serenomica-pipeline-archive"
upload_s3=false
stellarpgx_ref="stellar_references/cyp2d6_reference_HerCan_v2.bed" #"stellar_references/cyp2d6_reference_pgx_copy.bed" #"stellar_references/cyp2d6_reference_pgx.bed" #"stellar_references/nicole_new_ref.bed"

file=${1--}
declare -A vcf_s3_dict
declare -A bam_s3_dict

echo "----------------------------------------------------------------"

# ------------------------ Start PGx Engine --------------------------
echo -n "Start Docker container: "
docker start pgx-api

if [ -z "$SKIP_STELLAR" ]; then
	# ------------------------ Empty data folder -------------------------
	if ! [ -z "$(ls -A data)" ]; then
	  echo "Remove existing files in data folder"
	  rm -r data/*
	fi

	# --------------------- Download files from S3 -----------------------
	while IFS= read vcf_s3; do
	  temp=($(echo $vcf_s3 | tr "/" "\n"))
	  vcf_s3_bucket="${temp[1]}"
	  flowcell="${temp[2]}"
	  run="${temp[3]}"
	  sample="${temp[4]/.vcf/}"

	  vcf_s3_prefix="s3://${vcf_s3_bucket}/${flowcell}/${run}"
	  bam_s3_prefix="s3://${bam_s3_bucket}/${run}"

	  vcf_s3_dict[$sample]=$vcf_s3_prefix
	  bam_s3_dict[$sample]=$bam_s3_prefix

	  bam_s3="${bam_s3_prefix}/${sample}.tar.gz"
	  s3cmd get $vcf_s3 data/
	  s3cmd get $bam_s3 data/
	  cd data/
	  tar xf ${sample}.tar.gz
	  ls -l
	  rm ${sample}.tar.gz
	  cd ..
	  echo "SAMPLE DOWNLOAD COMPlETE"
	done < <(cat -- "$file")
	echo "ALL DOWNLOADS COMPlETE"

	# ------------------------ Index BAM files ---------------------------
	for f in data/*.bam; do
	  if [ ! -f "$f" ]; then
	  	echo "No BAM file downloaded or found."
	  	exit 1
	  fi
	  if [ ! -f "$f.bai" ]; then
		  echo "Indexing: "$f
		  #BAM=${f/.cram/.bam}
		  #samtools view -T hg38/Homo_sapiens_assembly38.fasta -b -o $BAM $f
		  samtools index "$f" "$f.bai"
	  fi
	done

	# ------------------------ Run Stellar PGx ---------------------------
	echo "Starting stellar PGx"
	cp $stellarpgx_ref resources/cyp2d6/res_hg38/test3.bed
	./run_stellar.sh

	# ------------------- Generate final PGx Output ----------------------
	for f in data/*.vcf; do
	  s=${f::-4}
	  #if [[ $s = *_calls ]]; then
		  s=${s/_calls/}
		  printf "\nRun PGx Engine for: ${s}_calls.vcf\n"
		  cat $f | grep -E '^#' | sed 's/##contig=<ID=/##contig=<ID=chr/' > ${s}_outside.vcf
		  cat $f | grep -vE '^#' | xargs -I {} echo chr{} | sed 's/\t[.]\tAC=/\tPASS\tAC=/' >> ${s}_outside.vcf

		  python3 combine_outside_calls.py $s
		  curl -X 'POST' \
		    'http://localhost:5000/run' \
		    -H 'accept: application/json' \
		    -H 'Content-Type: multipart/form-data' \
		    -F "files=@${s}_outside.vcf;type=text/x-vcard" \
		    -F "files=@${s}_outside_call.tsv" \
		    -o "${s}_pgx_result.zip"
		  docker logs pgx-api > data/engine.log 2>&1
	  #fi
	done
else
	while IFS= read vcf_s3; do
		temp=($(echo $vcf_s3 | tr "/" "\n"))
		vcf_s3_bucket="${temp[1]}"
		flowcell="${temp[2]}"
		run="${temp[3]}"
		sample="${temp[4]/.vcf/}"

		vcf_s3_prefix="s3://${vcf_s3_bucket}/${flowcell}/${run}"
		bam_s3_prefix="s3://${bam_s3_bucket}/${run}"

		vcf_s3_dict[$sample]=$vcf_s3_prefix
		bam_s3_dict[$sample]=$bam_s3_prefix
	done
fi

# ----------------------- Stop PGx Engine ----------------------------
echo ""
echo -n "Stop Docker container: "
docker stop pgx-api

if [ -z "$SKIP_OVERVIEW" ]; then
	# ---------------------- Post-Processing -----------------------------
	mkdir -p pgx_results
	if ! [ -z "$(ls -A pgx_results)" ]; then
	  echo "Remove existing files in pgx_results folder"
	  rm -r pgx_results/*
	fi
	echo "Copy PGx Output to pgx_results"
	cp data/*.zip pgx_results/
	cp data/engine.log pgx_results/
	cp data/*.alleles pgx_results/

	echo "Unzip and create diplotype/rsid overview"
	python3 create_overview.py

	#echo "Copy over allele files"
	#python3 copy_allele_file.py

	rm -r work/*
	rm data/*outside*
	rm data/*zip
	rm data/engine.log
	rm data/*.alleles
fi
rm -f pgx_results/engine.log
# --------------------- Upload files to S3 ---------------------------
if $upload_s3; then
  for result in $(ls -d pgx_results/*); do
    sample="${result:12:-11}"
    vcf_s3_prefix=${vcf_s3_dict[$sample]}
    bam_s3_prefix=${bam_s3_dict[$sample]}
    s3cmd put "${result}/output.json" "${vcf_s3_prefix}/${sample}.json"
    echo -n "Compressing: "
    tar -zcvf "${result}.tar.gz" "${result}"
    s3cmd put "${result}.tar.gz" "${bam_s3_prefix}/"
  done
fi

printf "\nDone\n"
echo "----------------------------------------------------------------"