pgx-main from prod added
This commit is contained in:
56
pgx-main/scripts/abcb1/hg38/bin/stellarpgx.py
Normal file
56
pgx-main/scripts/abcb1/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import math
|
||||
from sv_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("ABCB1 Variant Analysis with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
infile = sys.argv[1]
|
||||
cov_file = sys.argv[2]
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
core_vars = "No core SNVs detected; haplotypes equivalent to GRCh38 content"
|
||||
|
||||
return core_vars
|
||||
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
av_ctrl_cov = get_total_CN(cov_file)[2]
|
||||
exon_cov = get_total_CN(cov_file)[3]
|
||||
|
||||
print("Initially computed Copy Number = {}".format(cn))
|
||||
|
||||
for i in range(1, len(exon_cov)):
|
||||
|
||||
if exon_cov[i-1]/av_ctrl_cov < 0.45 :
|
||||
print ('Check exon {} for potential deletion if using high coverage WGS'.format(str(i)))
|
||||
else:
|
||||
pass
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
56
pgx-main/scripts/abcb1/hg38/bin/stellarpgx.py~
Normal file
56
pgx-main/scripts/abcb1/hg38/bin/stellarpgx.py~
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import math
|
||||
from sv_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("${i^^} Variant Analysis with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
infile = sys.argv[1]
|
||||
cov_file = sys.argv[2]
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
core_vars = "No core SNVs detected; haplotypes equivalent to GRCh38 content"
|
||||
|
||||
return core_vars
|
||||
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
av_ctrl_cov = get_total_CN(cov_file)[2]
|
||||
exon_cov = get_total_CN(cov_file)[3]
|
||||
|
||||
print("Initially computed Copy Number = {}".format(cn))
|
||||
|
||||
for i in range(1, len(exon_cov)):
|
||||
|
||||
if exon_cov[i-1]/av_ctrl_cov < 0.45 :
|
||||
print ('Check exon {} for potential deletion if using high coverage WGS'.format(str(i)))
|
||||
else:
|
||||
pass
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
37
pgx-main/scripts/abcb1/hg38/bin/sv_modules.py
Normal file
37
pgx-main/scripts/abcb1/hg38/bin/sv_modules.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_abcb1_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
|
||||
exon_cov_list = []
|
||||
all_reg = all_reg[3:]
|
||||
|
||||
a = list(range(1, len(all_reg)))
|
||||
|
||||
for i in a:
|
||||
exon_cov = 'av_e' + str(i)
|
||||
exon_cov = float(all_reg[i-1][3])/(float(all_reg[i-1][2]) - float(all_reg[i-1][1]))
|
||||
exon_cov_list.append(exon_cov)
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_abcb1_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_abcb1_cov), round(av_ctrl_cov), exon_cov_list];
|
||||
|
||||
56
pgx-main/scripts/abcg2/hg38/bin/stellarpgx.py
Normal file
56
pgx-main/scripts/abcg2/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import math
|
||||
from sv_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("ABCG2 Variant Analysis with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
infile = sys.argv[1]
|
||||
cov_file = sys.argv[2]
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
core_vars = "No core SNVs detected; haplotypes equivalent to GRCh38 content"
|
||||
|
||||
return core_vars
|
||||
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
av_ctrl_cov = get_total_CN(cov_file)[2]
|
||||
exon_cov = get_total_CN(cov_file)[3]
|
||||
|
||||
print("Initially computed Copy Number = {}".format(cn))
|
||||
|
||||
for i in range(1, len(exon_cov)):
|
||||
|
||||
if exon_cov[i-1]/av_ctrl_cov < 0.45 :
|
||||
print ('Check exon {} for potential deletion if using high coverage WGS'.format(str(i)))
|
||||
else:
|
||||
pass
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
56
pgx-main/scripts/abcg2/hg38/bin/stellarpgx.py~
Normal file
56
pgx-main/scripts/abcg2/hg38/bin/stellarpgx.py~
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import math
|
||||
from sv_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("${i^^} Variant Analysis with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
infile = sys.argv[1]
|
||||
cov_file = sys.argv[2]
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
core_vars = "No core SNVs detected; haplotypes equivalent to GRCh38 content"
|
||||
|
||||
return core_vars
|
||||
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
av_ctrl_cov = get_total_CN(cov_file)[2]
|
||||
exon_cov = get_total_CN(cov_file)[3]
|
||||
|
||||
print("Initially computed Copy Number = {}".format(cn))
|
||||
|
||||
for i in range(1, len(exon_cov)):
|
||||
|
||||
if exon_cov[i-1]/av_ctrl_cov < 0.45 :
|
||||
print ('Check exon {} for potential deletion if using high coverage WGS'.format(str(i)))
|
||||
else:
|
||||
pass
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
37
pgx-main/scripts/abcg2/hg38/bin/sv_modules.py
Normal file
37
pgx-main/scripts/abcg2/hg38/bin/sv_modules.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_abcg2_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
|
||||
exon_cov_list = []
|
||||
all_reg = all_reg[3:]
|
||||
|
||||
a = list(range(1, len(all_reg)))
|
||||
|
||||
for i in a:
|
||||
exon_cov = 'av_e' + str(i)
|
||||
exon_cov = float(all_reg[i-1][3])/(float(all_reg[i-1][2]) - float(all_reg[i-1][1]))
|
||||
exon_cov_list.append(exon_cov)
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_abcg2_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_abcg2_cov), round(av_ctrl_cov), exon_cov_list];
|
||||
|
||||
56
pgx-main/scripts/comt/hg38/bin/stellarpgx.py
Normal file
56
pgx-main/scripts/comt/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import math
|
||||
from sv_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("COMT Variant Analysis with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
infile = sys.argv[1]
|
||||
cov_file = sys.argv[2]
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
core_vars = "No core SNVs detected; haplotypes equivalent to GRCh38 content"
|
||||
|
||||
return core_vars
|
||||
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
av_ctrl_cov = get_total_CN(cov_file)[2]
|
||||
exon_cov = get_total_CN(cov_file)[3]
|
||||
|
||||
print("Initially computed Copy Number = {}".format(cn))
|
||||
|
||||
for i in range(1, len(exon_cov)):
|
||||
|
||||
if exon_cov[i-1]/av_ctrl_cov < 0.45 :
|
||||
print ('Check exon {} for potential deletion if using high coverage WGS'.format(str(i)))
|
||||
else:
|
||||
pass
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
56
pgx-main/scripts/comt/hg38/bin/stellarpgx.py~
Normal file
56
pgx-main/scripts/comt/hg38/bin/stellarpgx.py~
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import math
|
||||
from sv_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("${i^^} Variant Analysis with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
infile = sys.argv[1]
|
||||
cov_file = sys.argv[2]
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
core_vars = "No core SNVs detected; haplotypes equivalent to GRCh38 content"
|
||||
|
||||
return core_vars
|
||||
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
av_ctrl_cov = get_total_CN(cov_file)[2]
|
||||
exon_cov = get_total_CN(cov_file)[3]
|
||||
|
||||
print("Initially computed Copy Number = {}".format(cn))
|
||||
|
||||
for i in range(1, len(exon_cov)):
|
||||
|
||||
if exon_cov[i-1]/av_ctrl_cov < 0.45 :
|
||||
print ('Check exon {} for potential deletion if using high coverage WGS'.format(str(i)))
|
||||
else:
|
||||
pass
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
37
pgx-main/scripts/comt/hg38/bin/sv_modules.py
Normal file
37
pgx-main/scripts/comt/hg38/bin/sv_modules.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_comt_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
|
||||
exon_cov_list = []
|
||||
all_reg = all_reg[3:]
|
||||
|
||||
a = list(range(1, len(all_reg)))
|
||||
|
||||
for i in a:
|
||||
exon_cov = 'av_e' + str(i)
|
||||
exon_cov = float(all_reg[i-1][3])/(float(all_reg[i-1][2]) - float(all_reg[i-1][1]))
|
||||
exon_cov_list.append(exon_cov)
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_comt_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_comt_cov), round(av_ctrl_cov), exon_cov_list];
|
||||
|
||||
37
pgx-main/scripts/comt/hg38/bin/sv_modules.py~
Normal file
37
pgx-main/scripts/comt/hg38/bin/sv_modules.py~
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_comt_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
|
||||
exon_cov_list = []
|
||||
all_reg = all_reg[3:]
|
||||
|
||||
a = list(range(1, len(all_reg)))
|
||||
|
||||
for i in a:
|
||||
exon_cov = 'av_e' + str(i)
|
||||
exon_cov = float(all_reg[i-1][3])/(float(all_reg[i-1][2]) - float(all_reg[i-1][1]))
|
||||
exon_cov_list.append(exon_cov)
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_comt_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_comt_cov), round(av_ctrl_cov), exon_cov_list];
|
||||
|
||||
78
pgx-main/scripts/cyp1a1/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp1a1/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
432
pgx-main/scripts/cyp1a1/b37/bin/snv_def_modules.py
Normal file
432
pgx-main/scripts/cyp1a1/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,432 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
#print(score)
|
||||
|
||||
|
||||
# if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
# elem = "17.v1_4.v1"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
# print(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
69
pgx-main/scripts/cyp1a1/b37/bin/stellarpgx.py
Normal file
69
pgx-main/scripts/cyp1a1/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP1A1 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp1a1/b37/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp1a1/b37/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp1a1/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp1a1/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
432
pgx-main/scripts/cyp1a1/hg38/bin/snv_def_modules.py
Normal file
432
pgx-main/scripts/cyp1a1/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,432 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
#print(score)
|
||||
|
||||
|
||||
# if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
# elem = "17.v1_4.v1"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
# print(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
69
pgx-main/scripts/cyp1a1/hg38/bin/stellarpgx.py
Normal file
69
pgx-main/scripts/cyp1a1/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP1A1 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp1a1/hg38/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp1a1/hg38/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp1a2/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp1a2/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1B.v1_1B.v1'
|
||||
allele_res = '*1B/*1B'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
370
pgx-main/scripts/cyp1a2/b37/bin/snv_def_modules.py
Normal file
370
pgx-main/scripts/cyp1a2/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,370 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1B.v1_1B.v1']
|
||||
allele_res = "*1B/*1B"
|
||||
return ["".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1A.v1_1G.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1A.v1_1J.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1A.v1_1V.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1B.v1_1J.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1B.v1_1W.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1F.v1_1W.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1A.v1_1W.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
71
pgx-main/scripts/cyp1a2/b37/bin/stellarpgx.py
Normal file
71
pgx-main/scripts/cyp1a2/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP1A2 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp1a2/b37/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp1a2/b37/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp1a2/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp1a2/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1A.v1_1A.v1'
|
||||
allele_res = '*1A/*1A'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
305
pgx-main/scripts/cyp1a2/hg38/bin/snv_def_modules.py
Normal file
305
pgx-main/scripts/cyp1a2/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,305 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
|
||||
elif chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
70
pgx-main/scripts/cyp1a2/hg38/bin/stellarpgx.py
Normal file
70
pgx-main/scripts/cyp1a2/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP1A2 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp1a2/hg38/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp1a2/hg38/bin/sv_modules.py
Normal file
77
pgx-main/scripts/cyp2a6/b37/bin/bkg_modules.py
Normal file
77
pgx-main/scripts/cyp2a6/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '18.v1_18.v1'
|
||||
allele_res = '*18/*18'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
442
pgx-main/scripts/cyp2a6/b37/bin/snv_def_modules.py
Normal file
442
pgx-main/scripts/cyp2a6/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,442 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['18.v1_18.v1']
|
||||
allele_res = "*18/*18"
|
||||
return ["".join(cand_res), "".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
def format_allele(m_diplo):
|
||||
res1 = [i for i in range(len(m_diplo)) if m_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(m_diplo)) if m_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (m_diplo[:res2[0]])
|
||||
hap2 = "*" + str (m_diplo[res1[0]+1:res2[1]])
|
||||
return (hap1 + "/" + hap2)
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
score2 = []
|
||||
test1 = []
|
||||
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
uniq_gt1 = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
for j in diplo_supp_gt:
|
||||
if j not in all_var_gt:
|
||||
uniq_gt1.append(j)
|
||||
score_dip2 = len(uniq_gt1)
|
||||
score2.append(score_dip2)
|
||||
|
||||
min_score = min(score)
|
||||
min_score2 = min(score2)
|
||||
|
||||
|
||||
res_list = [i for i in range(len(score2)) if score2[i] == min_score2]
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
amb_set1 = []
|
||||
|
||||
|
||||
if len(res_list) > 3:
|
||||
soln_list_1 = soln_list1
|
||||
|
||||
elif len(res_list) < 3:
|
||||
amb_set1.append(tiebreak1[res_list[0]])
|
||||
amb_set1.append(tiebreak1[res_list[-1]])
|
||||
soln_list_1 = amb_set1
|
||||
|
||||
for elem in soln_list_1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1 and soln_list1[1] == "35.v1_7.v1":
|
||||
return[soln_list1, ['35.v1_7.v1'], '*35/*7'];
|
||||
|
||||
elif score.count(min_score) > 1 and soln_list1[1] == "18.v1_7.v1":
|
||||
return[soln_list1, ['18.v1_7.v1'], '*18/*7'];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
index_scores = [i for i in range(len(score)) if score[i] == min_score]
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
for i in soln_list1:
|
||||
if format_allele(i) == alt_solns[0]:
|
||||
diplo1 = i
|
||||
return[soln_list1, diplo1, alt_solns[0]];
|
||||
|
||||
elif chkList(alt_solns) != "Equal" and alt_solns[0] == '*1/*36':
|
||||
return[soln_list1, ['1.v1_36.v1'], '*1/*36'];
|
||||
|
||||
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
score2 = []
|
||||
test1 = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
uniq_gt1 = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
for j in diplo_supp_gt:
|
||||
if j not in all_var_gt:
|
||||
uniq_gt1.append(j)
|
||||
score_dip2 = len(uniq_gt1)
|
||||
score2.append(score_dip2)
|
||||
|
||||
min_score = min(score)
|
||||
min_score2 = min(score2)
|
||||
|
||||
|
||||
res_list = [i for i in range(len(score2)) if score2[i] == min_score2]
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
|
||||
amb_soln_set = []
|
||||
amb_set1 = []
|
||||
|
||||
if len(res_list) > 3:
|
||||
soln_list_1 = soln_list1
|
||||
|
||||
elif len(res_list) < 3:
|
||||
amb_set1.append(tiebreak1[res_list[0]])
|
||||
amb_set1.append(tiebreak1[res_list[-1]])
|
||||
soln_list_1 = amb_set1
|
||||
|
||||
|
||||
for elem in soln_list_1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
elif score.count(min_score) > 1 and soln_list1[0] == "1.v1_37.v1":
|
||||
return[soln_list1, ['1.v1_37.v1'], '*1/*37'];
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
index_scores = [i for i in range(len(score)) if score[i] == min_score]
|
||||
|
||||
alt_solns = []
|
||||
alt_solns1 = []
|
||||
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
|
||||
for n in res_list:
|
||||
elem = tiebreak1[n]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns1.append(result_dip)
|
||||
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
for i in soln_list1:
|
||||
if format_allele(i) == alt_solns[0]:
|
||||
diplo1 = i
|
||||
|
||||
return[soln_list1, diplo1, alt_solns[0]];
|
||||
|
||||
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
for i in alt_solns:
|
||||
if i in alt_solns1:
|
||||
amb_soln_set.append(i)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
355
pgx-main/scripts/cyp2a6/b37/bin/stellarpgx.py
Normal file
355
pgx-main/scripts/cyp2a6/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,355 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2A6 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if int(cn) == 0:
|
||||
print("\nResult:")
|
||||
print("*4/*4")
|
||||
|
||||
elif bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
elif bac_alleles != None and int(cn) < 2:
|
||||
bac_alleles = bac_alleles[0].split("/")
|
||||
bac_alleles1 = bac_alleles[0] + "/" + "*4"
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles1 + "]")
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
av_cov = get_total_CN(cov_file)[1]
|
||||
cov_e1_e2 = get_total_CN(cov_file)[3]
|
||||
cov_e3_e9 = get_total_CN(cov_file)[4]
|
||||
cov_3p_utr = get_total_CN(cov_file)[5]
|
||||
cov_ctrl = get_total_CN(cov_file)[2]
|
||||
cov_e1_e4 = get_total_CN(cov_file)[6]
|
||||
cov_e5_e9 = get_total_CN(cov_file)[7]
|
||||
cov_e3_e4 = get_total_CN(cov_file)[8]
|
||||
cov_e9_3pr = get_total_CN(cov_file)[9]
|
||||
cov_e7_e8 = get_total_CN(cov_file)[10]
|
||||
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
conv_3p_utr = ['*5','*7','*8','*10','*19','*24','*28','*35','*36','*37']
|
||||
|
||||
if snv_def_alleles != '*18/*18' and cn != '0':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
|
||||
if cn == '2':
|
||||
|
||||
if 'or' in snv_def_alleles:
|
||||
print (snv_def_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
|
||||
if snv_def_alleles[0] == '*46' or snv_def_alleles[1] == '*46':
|
||||
ind_star2 = snv_def_alleles.index('*46')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_1b = star_1b_test(cov_3p_utr, cov_ctrl)
|
||||
|
||||
|
||||
if test_1b == 'no_1B' and (snv_def_alleles.count('*46') == 2):
|
||||
gene_alleles = "*1" + "/" + "*1"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b == 'het_1B' and (snv_def_alleles[ind_other] not in conv_3p_utr):
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b =='hom_1B' and (snv_def_alleles[ind_other] in conv_3p_utr):
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == '*1' or snv_def_alleles[1] == '*1':
|
||||
ind_star2 = snv_def_alleles.index('*1')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
# test_12 = hybrid_12_test1(cov_e1_e2, cov_e3_e9)
|
||||
|
||||
test_12_34 = hybrid_12_34(cov_e1_e2, cov_e3_e9, cov_e1_e4, cov_e5_e9, cov_e3_e4)
|
||||
|
||||
test_1b = star_1b_test(cov_3p_utr, cov_ctrl)
|
||||
|
||||
if test_12_34 == 'norm_var':
|
||||
|
||||
if test_1b == 'no_1B':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b == 'het_1B' and (snv_def_alleles[ind_other] not in conv_3p_utr):
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b == 'hom_1B' and (snv_def_alleles.count('*1') == 2):
|
||||
gene_alleles = "*46/*46"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b =='hom_1B' and (snv_def_alleles[ind_other] in conv_3p_utr):
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b =='hom_1B':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_12_34 == 'hyb_12':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*12"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_12_34 == 'hyb_12_2' and snv_def_alleles == "*1/*1":
|
||||
gene_alleles = "*12/*12"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_12_34 == 'hyb_34':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*34"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_12_34 == 'hyb_34_2' and snv_def_alleles == "*1/*1":
|
||||
gene_alleles = "*34/*34"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
else:
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
test_47_1 = hybrid_47_test1(cov_e9_3pr, cov_e7_e8)
|
||||
test_47_2 = hybrid_47_test2(cov_e9_3pr, cov_e7_e8, cov_ctrl)
|
||||
|
||||
if del_confirm == '*4/*4' and test_47_2 == 'no_hyb_47':
|
||||
gene_alleles = '*4/*4'
|
||||
|
||||
elif del_confirm == '*4/*4' and test_47_2 == 'het_47':
|
||||
gene_alleles = '*4/*47'
|
||||
|
||||
elif del_confirm == '*4/*4' and test_47_2 == 'hom_47':
|
||||
gene_alleles = '*47/*47'
|
||||
|
||||
elif del_confirm == '*4' and test_47_1 == 'no_hyb_47':
|
||||
gene_alleles = '*4' + "/" + "*other"
|
||||
|
||||
elif del_confirm == '*4' and test_47_1 == 'hyb_47':
|
||||
gene_alleles = '*47' + "/" + "*other"
|
||||
|
||||
else:
|
||||
gene_alleles = "*4/*4"
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
test_47_1 = hybrid_47_test1(cov_e9_3pr, cov_e7_e8)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == 'None':
|
||||
print (snv_def_alleles + "\t" + "Possible CYP2A6 gene deletion (*4) present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == 'None':
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1] and test_47_1 == 'no_hyb_47':
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*4"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1] and test_47_1 == 'hyb_47':
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*47"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
gene_alleles = samp_allele1 + "/" + "*4"
|
||||
|
||||
if test_47_1 == 'no_hyb_47':
|
||||
pass
|
||||
elif test_47_1 == 'hyb_47':
|
||||
gene_alleles = samp_allele1 + "/" + "*47"
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
test_1b = star_1b_test(cov_3p_utr, cov_ctrl)
|
||||
test_47_1 = hybrid_47_test1(cov_e9_3pr, cov_e7_e8)
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
del_confirm = "*4"
|
||||
|
||||
if snv_def_alleles[0] == '*1' and test_1b == 'hom_1B':
|
||||
snv_def_alleles[0] = '*46'
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] in conv_3p_utr and test_47_1 == 'no_hyb_47':
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] in conv_3p_utr and test_47_1 == 'hyb_47':
|
||||
gene_alleles = snv_def_alleles[0] + "/" + '*47'
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
del_confirm == "*4"
|
||||
|
||||
gene_alleles = del_confirm + "/" + samp_allele1
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
orig = snv_def_alleles
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
|
||||
print("Possible rare CYP2A6/2A7 hybrid present")
|
||||
381
pgx-main/scripts/cyp2a6/b37/bin/sv_modules.py
Normal file
381
pgx-main/scripts/cyp2a6/b37/bin/sv_modules.py
Normal file
@@ -0,0 +1,381 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_2a6_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
av_e1_e2 = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
av_e3_e9 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
av_3p_utr = float(all_reg[5][3])/(float(all_reg[5][2]) - float(all_reg[5][1]))
|
||||
av_ex1_ex4 = float(all_reg[6][3])/(float(all_reg[6][2]) - float(all_reg[6][1]))
|
||||
av_ex5_ex9 = float(all_reg[7][3])/(float(all_reg[7][2]) - float(all_reg[7][1]))
|
||||
av_ex3_ex4 = float(all_reg[8][3])/(float(all_reg[8][2]) - float(all_reg[8][1]))
|
||||
av_ex9_3pr = float(all_reg[9][3])/(float(all_reg[9][2]) - float(all_reg[9][1]))
|
||||
av_ex7_ex8 = float(all_reg[10][3])/(float(all_reg[10][2]) - float(all_reg[10][1]))
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_2a6_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_2a6_cov), round(av_ctrl_cov), str(av_e1_e2), str(av_e3_e9), str(av_3p_utr), str(av_ex1_ex4), str(av_ex5_ex9), str(av_ex3_ex4), str(av_ex9_3pr), str(av_ex7_ex8)];
|
||||
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*4/*4"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*4"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/norm_cov, 3))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
def hybrid_12_test1(cov_e1_e2, cov_e3_e9):
|
||||
|
||||
if 0.65 < float(cov_e1_e2)/float(cov_e3_e9) < 1.25:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.15 < float(cov_e1_e2)/float(cov_e3_e9) < 0.65:
|
||||
return 'hyb_12'
|
||||
|
||||
elif float(cov_e1_e2)/float(cov_e3_e9) < 0.15:
|
||||
return 'hyb_12_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
def hybrid_12_34(cov_e1_e2, cov_e3_e9, cov_e1_e4, cov_e5_e9, cov_e3_e4):
|
||||
|
||||
if 0.65 < float(cov_e1_e4)/float(cov_e5_e9) < 1.25:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.65 < float(cov_e1_e2)/float(cov_e3_e9) < 1.25:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.15 < float(cov_e1_e4)/float(cov_e5_e9) < 0.65 and (0.65 < float(cov_e1_e2)/float(cov_e3_e4) < 1.25):
|
||||
return 'hyb_34'
|
||||
|
||||
elif 0.15 < float(cov_e1_e4)/float(cov_e5_e9) < 0.65 and (0.15 < float(cov_e1_e2)/float(cov_e3_e4) < 0.65):
|
||||
return 'hyb_12'
|
||||
|
||||
elif float(cov_e1_e4)/float(cov_e5_e9) < 0.15:
|
||||
return 'hyb_34_2'
|
||||
|
||||
elif 0.15 < float(cov_e1_e2)/float(cov_e3_e9) < 0.65:
|
||||
return 'hyb_12'
|
||||
|
||||
elif float(cov_e1_e2)/float(cov_e3_e9) < 0.15:
|
||||
return 'hyb_12_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
def hybrid_47_test1(cov_e9_3pr, cov_e7_e8):
|
||||
|
||||
if 0.25 < float(cov_e7_e8)/float(cov_e9_3pr) < 0.65:
|
||||
return 'hyb_47'
|
||||
|
||||
else:
|
||||
return 'no_hyb_47'
|
||||
|
||||
|
||||
def hybrid_47_test2(cov_e9_3pr, cov_e7_e8, cov_ctrl):
|
||||
|
||||
if float(cov_e9_3pr)/float(cov_ctrl) > 0.75:
|
||||
return 'hom_47'
|
||||
|
||||
elif 0.25 < float(cov_e9_3pr)/float(cov_ctrl) < 0.75:
|
||||
return 'het_47'
|
||||
|
||||
else:
|
||||
return 'no_hyb_47'
|
||||
|
||||
|
||||
def star_1b_test(cov_3p_utr, cov_ctrl):
|
||||
|
||||
if float(cov_3p_utr)/float(cov_ctrl) < 0.25:
|
||||
return 'hom_1B'
|
||||
|
||||
elif float(cov_3p_utr)/float(cov_ctrl) > 0.65:
|
||||
return 'no_1B'
|
||||
|
||||
else:
|
||||
return 'het_1B'
|
||||
77
pgx-main/scripts/cyp2a6/hg38/bin/bkg_modules.py
Normal file
77
pgx-main/scripts/cyp2a6/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
469
pgx-main/scripts/cyp2a6/hg38/bin/snv_def_modules.py
Normal file
469
pgx-main/scripts/cyp2a6/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,469 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
|
||||
def format_allele(diplo_n):
|
||||
res1 = [i for i in range(len(diplo_n)) if diplo_n.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo_n)) if diplo_n.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo_n[:res2[0]])
|
||||
hap2 = "*" + str (diplo_n[res1[0]+1:res2[1]])
|
||||
return (hap1 + "/" + hap2)
|
||||
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), "".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
score2 = []
|
||||
test1 = []
|
||||
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
uniq_gt1 = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
for j in diplo_supp_gt:
|
||||
if j not in all_var_gt:
|
||||
uniq_gt1.append(j)
|
||||
score_dip2 = len(uniq_gt1)
|
||||
score2.append(score_dip2)
|
||||
|
||||
min_score = min(score)
|
||||
min_score2 = min(score2)
|
||||
|
||||
res_list = [i for i in range(len(score2)) if score2[i] == min_score2]
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
amb_set1 = []
|
||||
|
||||
if len(res_list) > 3:
|
||||
soln_list_1 = soln_list1
|
||||
|
||||
elif len(res_list) < 3:
|
||||
amb_set1.append(tiebreak1[res_list[0]])
|
||||
amb_set1.append(tiebreak1[res_list[-1]])
|
||||
soln_list_1 = amb_set1
|
||||
|
||||
for elem in soln_list_1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1 and soln_list1[1] == "18.v1_7.v1" and score[-3] == min_score:
|
||||
return[soln_list1, ['18.v1_7.v1'], '*18/*7'];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
|
||||
index_scores = [i for i in range(len(score)) if score[i] == min_score]
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
for i in soln_list1:
|
||||
if format_allele(i) == alt_solns[0]:
|
||||
diplo1 = i
|
||||
|
||||
return [soln_list1, diplo1, alt_solns[0]];
|
||||
|
||||
|
||||
elif chkList(alt_solns) != "Equal" and alt_solns[0] == '*1/*36':
|
||||
return[soln_list1, ['1.v1_36.v1'], '*1/*36'];
|
||||
|
||||
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
score2 = []
|
||||
test1 = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
uniq_gt1 = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
for j in diplo_supp_gt:
|
||||
if j not in all_var_gt:
|
||||
uniq_gt1.append(j)
|
||||
score_dip2 = len(uniq_gt1)
|
||||
score2.append(score_dip2)
|
||||
|
||||
min_score = min(score)
|
||||
min_score2 = min(score2)
|
||||
|
||||
res_list = [i for i in range(len(score2)) if score2[i] == min_score2]
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
|
||||
amb_soln_set = []
|
||||
amb_set1 = []
|
||||
|
||||
if len(res_list) > 3:
|
||||
soln_list_1 = soln_list1
|
||||
|
||||
elif len(res_list) < 3:
|
||||
amb_set1.append(tiebreak1[res_list[0]])
|
||||
amb_set1.append(tiebreak1[res_list[-1]])
|
||||
soln_list_1 = amb_set1
|
||||
|
||||
|
||||
for elem in soln_list_1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
elif score.count(min_score) > 1 and soln_list1[1] == "10.v1_35.v1" and score[-3] == min_score:
|
||||
return[soln_list1, ['10.v1_35.v1'], '*10/*35'];
|
||||
|
||||
elif score.count(min_score) > 1 and soln_list1[1] == "10.v1_35.v1" and score.count(min_score) >= 4 and sum(score[-2:]) == min_score:
|
||||
# return[soln_list1, ['10.v1_35.v1'], '*10/*35'];
|
||||
amb_soln_set = []
|
||||
|
||||
for elem in soln_list1[1:]:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[2] == "36.v1_8.v1" and score.count(min_score) >= 4 and sum(score[-2:]) != min_score:
|
||||
# return[soln_list1, ['36.v1_8.v1'], '*36/*8'];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1 and soln_list1[0] == "1.v1_37.v1":
|
||||
return[soln_list1, ['1.v1_37.v1'], '*1/*37'];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
index_scores = [i for i in range(len(score)) if score[i] == min_score]
|
||||
|
||||
|
||||
alt_solns = []
|
||||
alt_solns1 = []
|
||||
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
|
||||
for n in res_list:
|
||||
elem = tiebreak1[n]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns1.append(result_dip)
|
||||
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
for i in soln_list1:
|
||||
if format_allele(i) == alt_solns[0]:
|
||||
diplo1 = i
|
||||
|
||||
return[soln_list1, diplo1, alt_solns[0]];
|
||||
|
||||
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
for i in alt_solns:
|
||||
if i in alt_solns1:
|
||||
amb_soln_set.append(i)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
359
pgx-main/scripts/cyp2a6/hg38/bin/stellarpgx.py
Normal file
359
pgx-main/scripts/cyp2a6/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,359 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2A6 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if int(cn) == 0:
|
||||
print("\nResult:")
|
||||
print("*4/*4")
|
||||
|
||||
elif bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
elif bac_alleles != None and int(cn) < 2:
|
||||
bac_alleles = bac_alleles[0].split("/")
|
||||
bac_alleles1 = bac_alleles[0] + "/" + "*4"
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles1 + "]")
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
av_cov = get_total_CN(cov_file)[1]
|
||||
cov_e1_e2 = get_total_CN(cov_file)[3]
|
||||
cov_e3_e9 = get_total_CN(cov_file)[4]
|
||||
cov_3p_utr = get_total_CN(cov_file)[5]
|
||||
cov_ctrl = get_total_CN(cov_file)[2]
|
||||
cov_e1_e4 = get_total_CN(cov_file)[6]
|
||||
cov_e5_e9 = get_total_CN(cov_file)[7]
|
||||
cov_e3_e4 = get_total_CN(cov_file)[8]
|
||||
cov_e9_3pr = get_total_CN(cov_file)[9]
|
||||
cov_e7_e8 = get_total_CN(cov_file)[10]
|
||||
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
conv_3p_utr = ['*5','*7','*8','*10','*19','*24','*28','*35','*36','*37']
|
||||
|
||||
|
||||
if snv_def_alleles != '*1/*1' and cn != '0':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
|
||||
if cn == '2':
|
||||
|
||||
if 'or' in snv_def_alleles:
|
||||
print (snv_def_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
|
||||
|
||||
if snv_def_alleles[0] == '*46' or snv_def_alleles[1] == '*46':
|
||||
ind_star2 = snv_def_alleles.index('*46')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_1b = star_1b_test(cov_3p_utr, cov_ctrl)
|
||||
|
||||
|
||||
if test_1b == 'no_1B' and (snv_def_alleles.count('*46') == 2):
|
||||
gene_alleles = "*1" + "/" + "*1"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b == 'het_1B' and (snv_def_alleles[ind_other] not in conv_3p_utr):
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b =='hom_1B' and (snv_def_alleles[ind_other] in conv_3p_utr):
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == '*1' or snv_def_alleles[1] == '*1':
|
||||
ind_star2 = snv_def_alleles.index('*1')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
# test_12 = hybrid_12_test1(cov_e1_e2, cov_e3_e9)
|
||||
|
||||
test_12_34 = hybrid_12_34(cov_e1_e2, cov_e3_e9, cov_e1_e4, cov_e5_e9, cov_e3_e4)
|
||||
|
||||
test_1b = star_1b_test(cov_3p_utr, cov_ctrl)
|
||||
|
||||
if test_12_34 == 'norm_var':
|
||||
|
||||
if test_1b == 'no_1B':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b == 'het_1B' and (snv_def_alleles[ind_other] not in conv_3p_utr):
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b == 'hom_1B' and (snv_def_alleles.count('*1') == 2):
|
||||
gene_alleles = "*46/*46"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b =='hom_1B' and (snv_def_alleles[ind_other] in conv_3p_utr):
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_1b =='hom_1B':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*46"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif test_12_34 == 'hyb_12':
|
||||
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*12"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_12_34 == 'hyb_12_2' and snv_def_alleles == "*1/*1":
|
||||
gene_alleles = "*12/*12"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_12_34 == 'hyb_34':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*34"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_12_34 == 'hyb_34_2' and snv_def_alleles == "*1/*1":
|
||||
gene_alleles = "*34/*34"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
test_47_1 = hybrid_47_test1(cov_e9_3pr, cov_e7_e8)
|
||||
test_47_2 = hybrid_47_test2(cov_e9_3pr, cov_e7_e8, cov_ctrl)
|
||||
|
||||
if del_confirm == '*4/*4' and test_47_2 == 'no_hyb_47':
|
||||
gene_alleles = '*4/*4'
|
||||
|
||||
elif del_confirm == '*4/*4' and test_47_2 == 'het_47':
|
||||
gene_alleles = '*4/*47'
|
||||
|
||||
elif del_confirm == '*4/*4' and test_47_2 == 'hom_47':
|
||||
gene_alleles = '*47/*47'
|
||||
|
||||
elif del_confirm == '*4' and test_47_1 == 'no_hyb_47':
|
||||
gene_alleles = '*4' + "/" + "*other"
|
||||
|
||||
elif del_confirm == '*4' and test_47_1 == 'hyb_47':
|
||||
gene_alleles = '*47' + "/" + "*other"
|
||||
|
||||
else:
|
||||
gene_alleles = "*4/*4"
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
test_47_1 = hybrid_47_test1(cov_e9_3pr, cov_e7_e8)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == 'None':
|
||||
print (snv_def_alleles + "\t" + "Possible CYP2A6 gene deletion (*4) present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == 'None':
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1] and test_47_1 == 'no_hyb_47':
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*4"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1] and test_47_1 == 'hyb_47':
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*47"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
gene_alleles = samp_allele1 + "/" + "*4"
|
||||
|
||||
if test_47_1 == 'no_hyb_47':
|
||||
pass
|
||||
elif test_47_1 == 'hyb_47':
|
||||
gene_alleles = samp_allele1 + "/" + "*47"
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
test_1b = star_1b_test(cov_3p_utr, cov_ctrl)
|
||||
test_47_1 = hybrid_47_test1(cov_e9_3pr, cov_e7_e8)
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
del_confirm = "*4"
|
||||
|
||||
if snv_def_alleles[0] == '*1' and test_1b == 'hom_1B':
|
||||
snv_def_alleles[0] = '*46'
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] in conv_3p_utr and test_47_1 == 'no_hyb_47':
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] in conv_3p_utr and test_47_1 == 'hyb_47':
|
||||
gene_alleles = snv_def_alleles[0] + "/" + '*47'
|
||||
print(gene_alleles)
|
||||
else:
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
# gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
# print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
del_confirm == "*4"
|
||||
|
||||
gene_alleles = del_confirm + "/" + samp_allele1
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
orig = snv_def_alleles
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
|
||||
print("Possible rare CYP2A6/2A7 hybrid present")
|
||||
383
pgx-main/scripts/cyp2a6/hg38/bin/sv_modules.py
Normal file
383
pgx-main/scripts/cyp2a6/hg38/bin/sv_modules.py
Normal file
@@ -0,0 +1,383 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_2a6_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
av_e1_e2 = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
av_e3_e9 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
av_3p_utr = float(all_reg[5][3])/(float(all_reg[5][2]) - float(all_reg[5][1]))
|
||||
av_ex1_ex4 = float(all_reg[6][3])/(float(all_reg[6][2]) - float(all_reg[6][1]))
|
||||
av_ex5_ex9 = float(all_reg[7][3])/(float(all_reg[7][2]) - float(all_reg[7][1]))
|
||||
av_ex3_ex4 = float(all_reg[8][3])/(float(all_reg[8][2]) - float(all_reg[8][1]))
|
||||
av_ex9_3pr = float(all_reg[9][3])/(float(all_reg[9][2]) - float(all_reg[9][1]))
|
||||
av_ex7_ex8 = float(all_reg[10][3])/(float(all_reg[10][2]) - float(all_reg[10][1]))
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_2a6_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_2a6_cov), round(av_ctrl_cov), str(av_e1_e2), str(av_e3_e9), str(av_3p_utr), str(av_ex1_ex4), str(av_ex5_ex9), str(av_ex3_ex4), str(av_ex9_3pr), str(av_ex7_ex8)];
|
||||
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*4/*4"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*4"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/av_read_cov, 4))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
def hybrid_12_test1(cov_e1_e2, cov_e3_e9):
|
||||
|
||||
if 0.65 < float(cov_e1_e2)/float(cov_e3_e9) < 1.25:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.15 < float(cov_e1_e2)/float(cov_e3_e9) < 0.65:
|
||||
return 'hyb_12'
|
||||
|
||||
elif float(cov_e1_e2)/float(cov_e3_e9) < 0.15:
|
||||
return 'hyb_12_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
def hybrid_12_34(cov_e1_e2, cov_e3_e9, cov_e1_e4, cov_e5_e9, cov_e3_e4):
|
||||
|
||||
if 0.65 < float(cov_e1_e4)/float(cov_e5_e9) < 1.25:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.65 < float(cov_e1_e2)/float(cov_e3_e9) < 1.25:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.15 < float(cov_e1_e4)/float(cov_e5_e9) < 0.65 and (0.65 < float(cov_e1_e2)/float(cov_e3_e4) < 1.25):
|
||||
return 'hyb_34'
|
||||
|
||||
elif 0.15 < float(cov_e1_e4)/float(cov_e5_e9) < 0.65 and (0.15 < float(cov_e1_e2)/float(cov_e3_e4) < 0.65):
|
||||
return 'hyb_12'
|
||||
|
||||
elif float(cov_e1_e4)/float(cov_e5_e9) < 0.15:
|
||||
return 'hyb_34_2'
|
||||
|
||||
elif 0.15 < float(cov_e1_e2)/float(cov_e3_e9) < 0.65:
|
||||
return 'hyb_12'
|
||||
|
||||
elif float(cov_e1_e2)/float(cov_e3_e9) < 0.15:
|
||||
return 'hyb_12_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
def hybrid_47_test1(cov_e9_3pr, cov_e7_e8):
|
||||
|
||||
if 0.25 < float(cov_e7_e8)/float(cov_e9_3pr) < 0.65:
|
||||
return 'hyb_47'
|
||||
|
||||
else:
|
||||
return 'no_hyb_47'
|
||||
|
||||
|
||||
# For *4/*4
|
||||
|
||||
def hybrid_47_test2(cov_e9_3pr, cov_e7_e8, cov_ctrl):
|
||||
|
||||
if float(cov_e9_3pr)/float(cov_ctrl) > 0.75:
|
||||
return 'hom_47'
|
||||
|
||||
elif 0.25 < float(cov_e9_3pr)/float(cov_ctrl) < 0.75:
|
||||
return 'het_47'
|
||||
|
||||
else:
|
||||
return 'no_hyb_47'
|
||||
|
||||
|
||||
def star_1b_test(cov_3p_utr, cov_ctrl):
|
||||
|
||||
if float(cov_3p_utr)/float(cov_ctrl) < 0.25:
|
||||
return 'hom_1B'
|
||||
|
||||
elif float(cov_3p_utr)/float(cov_ctrl) > 0.65:
|
||||
return 'no_1B'
|
||||
|
||||
else:
|
||||
return 'het_1B'
|
||||
78
pgx-main/scripts/cyp2b6/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp2b6/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
437
pgx-main/scripts/cyp2b6/b37/bin/snv_def_modules.py
Normal file
437
pgx-main/scripts/cyp2b6/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,437 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
# return core_variants
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
# return dbs
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
# return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
# star_list = []
|
||||
|
||||
# def chk_st1(lst):
|
||||
# for elem in lst:
|
||||
# elem = elem.split('_')
|
||||
# star_list.append(elem[0])
|
||||
# star_list.append(elem[1])
|
||||
# index = star_list.index('1.v1')
|
||||
# if index == None:
|
||||
# return 'alt'
|
||||
# elif index <= 1:
|
||||
# return 'first'
|
||||
# elif 1 < index < 4:
|
||||
# return 'second'
|
||||
# else:
|
||||
# return 'third'
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
# return(soln_list1)
|
||||
# pos1 = chk_st1(soln_list1)
|
||||
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[1] == "1.v1_18.v2":
|
||||
elem = "1.v1_18.v2"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "1.v1_36.v1":
|
||||
elem = "1.v1_36.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "1.v1_6.v1":
|
||||
elem = "1.v1_6.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[1] == "5.v1_6.v1":
|
||||
elem = "5.v1_6.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[1] == "36.v1_8.v1":
|
||||
elem = "36.v1_8.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "13.v1_5.v1":
|
||||
elem = "13.v1_5.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "18.v1_6.v1":
|
||||
elem = "18.v1_6.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "36.v1_5.v1":
|
||||
elem = "36.v1_5.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
elif chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1 and soln_list1[1] == "6.v1_8.v1":
|
||||
amb_soln_set = []
|
||||
temp_set = []
|
||||
temp_set.append(tiebreak1[0])
|
||||
temp_set.append(tiebreak1[-1])
|
||||
|
||||
for elem in temp_set:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
elem_pos = tiebreak1.index(elem)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 2:
|
||||
amb_soln_set = []
|
||||
temp_set = []
|
||||
temp_set.append(tiebreak1[0])
|
||||
temp_set.append(tiebreak1[-1])
|
||||
|
||||
for elem in temp_set:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
254
pgx-main/scripts/cyp2b6/b37/bin/stellarpgx.py
Normal file
254
pgx-main/scripts/cyp2b6/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2B6 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
av_cov = get_total_CN(cov_file)[1]
|
||||
cov_e1_int4 = get_total_CN(cov_file)[3]
|
||||
cov_int4_e9 = get_total_CN(cov_file)[4]
|
||||
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
if snv_def_alleles != '*1/*1' and cn != '0':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
|
||||
# if cn == '2' and snv_def_alleles == '*1/*1':
|
||||
|
||||
|
||||
if cn == '2':
|
||||
|
||||
if 'or' in snv_def_alleles:
|
||||
print (snv_def_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
|
||||
if snv_def_alleles[0] == '*1' or snv_def_alleles[1] == '*1':
|
||||
ind_star2 = snv_def_alleles.index('*1')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_29 = hybrid_29_test1(cov_e1_int4, cov_int4_e9)
|
||||
test_30 = hybrid_30_test1(cov_e1_int4, cov_int4_e9)
|
||||
|
||||
|
||||
if test_29 == 'norm_var' and test_30 == 'norm_var':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_29 == 'hyb_29':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*29"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_29 == 'hyb_29_2' and snv_def_alleles == "*1/*1":
|
||||
gene_alleles = "*29/*29"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
else:
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
if del_confirm == '*(full_gene_del)/*(full_gene_del)':
|
||||
gene_alleles = del_confirm
|
||||
print (gene_alleles)
|
||||
|
||||
elif del_confirm == '*(full_gene_del)':
|
||||
gene_alleles = del_confirm + "/" + "*other"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "*(full_gene_del)/*(full_gene_del)"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == "None":
|
||||
print (snv_def_alleles + "\t" + "Possible CYP2B6 gene deletion present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == "None":
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*(full_gene_del)"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
gene_alleles = samp_allele1 + "/" + "*(full_gene_del)"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
if del_confirm == "*(full_gene_del)/*(full_gene_del)":
|
||||
del_confirm = "*(full_gene_del)"
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
if del_confirm == "*(full_gene_del)/*(full_gene_del)":
|
||||
del_confirm = "*(full_gene_del)"
|
||||
gene_alleles = del_confirm + "/" + samp_allele1
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
# in_list = dup_test_init(sv_dup, av_cov)
|
||||
# print (snv_def_alleles)
|
||||
# print (snv_cand_alleles)
|
||||
orig = snv_def_alleles
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
# print("\n" + dup_test(sv_dup, hap_dbs, snv_def_alleles[0], snv_def_alleles[1], cn))
|
||||
# print (snv_cand_alleles)
|
||||
# print ("\n")
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
|
||||
print("Possible rare CYP2B6/2B7 hybrid present")
|
||||
331
pgx-main/scripts/cyp2b6/b37/bin/sv_modules.py
Normal file
331
pgx-main/scripts/cyp2b6/b37/bin/sv_modules.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_2b6_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
av_e1_int4 = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
av_int4_e9 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_2b6_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_2b6_cov), round(av_ctrl_cov), str(av_e1_int4), str(av_int4_e9)];
|
||||
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*(full_gene_del)/*(full_gene_del)"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*(full_gene_del)"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/norm_cov, 3))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
def hybrid_29_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.45 < float(cov_e1_int4)/float(cov_int4_e9) < 0.75:
|
||||
return 'hyb_29'
|
||||
|
||||
elif float(cov_e1_int4)/float(cov_int4_e9) < 0.15:
|
||||
return 'hyb_29_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
def hybrid_30_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.45 < float(cov_int4_e9)/float(cov_e1_int4) < 0.75:
|
||||
return 'hyb_30'
|
||||
|
||||
elif float(cov_int4_e9)/float(cov_e1_int4) < 0.15:
|
||||
return 'hyb_30_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
78
pgx-main/scripts/cyp2b6/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp2b6/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
435
pgx-main/scripts/cyp2b6/hg38/bin/snv_def_modules.py
Normal file
435
pgx-main/scripts/cyp2b6/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,435 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
# return core_variants
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
# return dbs
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
# return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
# star_list = []
|
||||
|
||||
# def chk_st1(lst):
|
||||
# for elem in lst:
|
||||
# elem = elem.split('_')
|
||||
# star_list.append(elem[0])
|
||||
# star_list.append(elem[1])
|
||||
# index = star_list.index('1.v1')
|
||||
# if index == None:
|
||||
# return 'alt'
|
||||
# elif index <= 1:
|
||||
# return 'first'
|
||||
# elif 1 < index < 4:
|
||||
# return 'second'
|
||||
# else:
|
||||
# return 'third'
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
# return(soln_list1)
|
||||
# pos1 = chk_st1(soln_list1)
|
||||
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[1] == "1.v1_18.v2":
|
||||
elem = "1.v1_18.v2"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "1.v1_36.v1":
|
||||
elem = "1.v1_36.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "1.v1_6.v1":
|
||||
elem = "1.v1_6.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[1] == "5.v1_6.v1":
|
||||
elem = "5.v1_6.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[1] == "36.v1_8.v1":
|
||||
elem = "36.v1_8.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "13.v1_5.v1":
|
||||
elem = "13.v1_5.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "18.v1_6.v1":
|
||||
elem = "18.v1_6.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
elif chkList(score) == "Equal" and soln_list1[0] == "36.v1_5.v1":
|
||||
elem = "36.v1_5.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
elif chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
elif score.count(min_score) > 1 and soln_list1[1] == "6.v1_8.v1":
|
||||
amb_soln_set = []
|
||||
temp_set = []
|
||||
temp_set.append(tiebreak1[0])
|
||||
temp_set.append(tiebreak1[-1])
|
||||
|
||||
for elem in temp_set:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
elem_pos = tiebreak1.index(elem)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 2:
|
||||
amb_soln_set = []
|
||||
temp_set = []
|
||||
temp_set.append(tiebreak1[0])
|
||||
temp_set.append(tiebreak1[-1])
|
||||
|
||||
for elem in temp_set:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
252
pgx-main/scripts/cyp2b6/hg38/bin/stellarpgx.py
Normal file
252
pgx-main/scripts/cyp2b6/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,252 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2B6 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
av_cov = get_total_CN(cov_file)[1]
|
||||
cov_e1_int4 = get_total_CN(cov_file)[3]
|
||||
cov_int4_e9 = get_total_CN(cov_file)[4]
|
||||
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
if snv_def_alleles != '*1/*1' and cn != '0':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
|
||||
# if cn == '2' and snv_def_alleles == '*1/*1':
|
||||
|
||||
|
||||
if cn == '2':
|
||||
|
||||
if 'or' in snv_def_alleles:
|
||||
print (snv_def_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
|
||||
if snv_def_alleles[0] == '*1' or snv_def_alleles[1] == '*1':
|
||||
ind_star2 = snv_def_alleles.index('*1')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_29 = hybrid_29_test1(cov_e1_int4, cov_int4_e9)
|
||||
test_30 = hybrid_30_test1(cov_e1_int4, cov_int4_e9)
|
||||
|
||||
|
||||
if test_29 == 'norm_var' and test_30 == 'norm_var':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_29 == 'hyb_29':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*29"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_29 == 'hyb_29_2' and snv_def_alleles == "*1/*1":
|
||||
gene_alleles = "*29/*29"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
else:
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
if del_confirm == '*(full_gene_del)/*(full_gene_del)':
|
||||
gene_alleles = del_confirm
|
||||
print (gene_alleles)
|
||||
|
||||
elif del_confirm == '*(full_gene_del)':
|
||||
gene_alleles = del_confirm + "/" + "*other"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "*(full_gene_del)/*(full_gene_del)"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == 'None':
|
||||
print (snv_def_alleles + "\t" + "Possible CYP2B6 gene deletion present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == 'None':
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*(full_gene_del)"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
gene_alleles = samp_allele1 + "/" + "*(full_gene_del)"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
if del_confirm == "*(full_gene_del)/*(full_gene_del)":
|
||||
del_confirm = "*(full_gene_del)"
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
if del_confirm == "*(full_gene_del)/*(full_gene_del)":
|
||||
del_confirm = "*(full_gene_del)"
|
||||
gene_alleles = del_confirm + "/" + samp_allele1
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
# in_list = dup_test_init(sv_dup, av_cov)
|
||||
# print (snv_def_alleles)
|
||||
# print (snv_cand_alleles)
|
||||
orig = snv_def_alleles
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
# print("\n" + dup_test(sv_dup, hap_dbs, snv_def_alleles[0], snv_def_alleles[1], cn))
|
||||
# print (snv_cand_alleles)
|
||||
# print ("\n")
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
|
||||
print("Possible rare CYP2B6/2B7 hybrid present")
|
||||
331
pgx-main/scripts/cyp2b6/hg38/bin/sv_modules.py
Normal file
331
pgx-main/scripts/cyp2b6/hg38/bin/sv_modules.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_2b6_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
av_e1_int4 = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
av_int4_e9 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_2b6_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_2b6_cov), round(av_ctrl_cov), str(av_e1_int4), str(av_int4_e9)];
|
||||
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*(full_gene_del)/*(full_gene_del)"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*(full_gene_del)"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/av_read_cov, 4))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
def hybrid_29_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.45 < float(cov_e1_int4)/float(cov_int4_e9) < 0.75:
|
||||
return 'hyb_29'
|
||||
|
||||
elif float(cov_e1_int4)/float(cov_int4_e9) < 0.15:
|
||||
return 'hyb_29_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
def hybrid_30_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.45 < float(cov_int4_e9)/float(cov_e1_int4) < 0.75:
|
||||
return 'hyb_30'
|
||||
|
||||
elif float(cov_int4_e9)/float(cov_e1_int4) < 0.15:
|
||||
return 'hyb_30_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
78
pgx-main/scripts/cyp2c19/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp2c19/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
432
pgx-main/scripts/cyp2c19/b37/bin/snv_def_modules.py
Normal file
432
pgx-main/scripts/cyp2c19/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,432 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
#print(score)
|
||||
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
elem = "17.v1_4.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
elif chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
# print(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
225
pgx-main/scripts/cyp2c19/b37/bin/stellarpgx.py
Normal file
225
pgx-main/scripts/cyp2c19/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,225 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2C19 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
av_cov = get_total_CN(cov_file)[1]
|
||||
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
if snv_def_alleles != '*1/*1' and cn != '0':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
|
||||
if cn == '2':
|
||||
|
||||
if 'or' in snv_def_alleles:
|
||||
print (snv_def_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = snv_def_alleles
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
if del_confirm == '*36/*36':
|
||||
gene_alleles = del_confirm
|
||||
print (gene_alleles)
|
||||
|
||||
elif del_confirm == '*36':
|
||||
gene_alleles = del_confirm + "/" + "*other"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "*36/*36"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == 'None':
|
||||
print (snv_def_alleles + "\t" + "Possible CYP2C19 gene deletion present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == 'None':
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*36"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
gene_alleles = samp_allele1 + "/" + "*36"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
if del_confirm == "*36/*36":
|
||||
del_confirm = "*36"
|
||||
else:
|
||||
del_confirm = "*36"
|
||||
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
if del_confirm == "*36/*36":
|
||||
del_confirm = "*36"
|
||||
else:
|
||||
del_confirm = "*36"
|
||||
|
||||
gene_alleles = del_confirm + "/" + samp_allele1
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
orig = snv_def_alleles
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
|
||||
print("Possible rare CYP2C19 structural variant present")
|
||||
331
pgx-main/scripts/cyp2c19/b37/bin/sv_modules.py
Normal file
331
pgx-main/scripts/cyp2c19/b37/bin/sv_modules.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_2c19_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
# av_e1_int4 = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
# av_int4_e9 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_2c19_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_2c19_cov), round(av_ctrl_cov)]; # , str(av_e1_int4), str(av_int4_e9)];
|
||||
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*(full_gene_del)/*(full_gene_del)"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*(full_gene_del)"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/norm_cov, 3))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
# def hybrid_29_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
# if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
# return 'norm_var'
|
||||
|
||||
# elif 0.45 < float(cov_e1_int4)/float(cov_int4_e9) < 0.75:
|
||||
# return 'hyb_29'
|
||||
|
||||
# elif float(cov_e1_int4)/float(cov_int4_e9) < 0.15:
|
||||
# return 'hyb_29_2'
|
||||
|
||||
# else:
|
||||
# return 'norm_var'
|
||||
|
||||
|
||||
# def hybrid_30_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
# if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
# return 'norm_var'
|
||||
|
||||
# elif 0.45 < float(cov_int4_e9)/float(cov_e1_int4) < 0.75:
|
||||
# return 'hyb_30'
|
||||
|
||||
# elif float(cov_int4_e9)/float(cov_e1_int4) < 0.15:
|
||||
# return 'hyb_30_2'
|
||||
|
||||
# else:
|
||||
# return 'norm_var'
|
||||
|
||||
78
pgx-main/scripts/cyp2c19/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp2c19/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '38.v1_38.v1'
|
||||
allele_res = '*38/*38'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
313
pgx-main/scripts/cyp2c19/hg38/bin/snv_def_modules.py
Normal file
313
pgx-main/scripts/cyp2c19/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,313 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['38.v1_38.v1']
|
||||
allele_res = "*38/*38"
|
||||
return ["".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
elem = "17.v1_4.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
elif chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
226
pgx-main/scripts/cyp2c19/hg38/bin/stellarpgx.py
Normal file
226
pgx-main/scripts/cyp2c19/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,226 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2C19 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
av_cov = get_total_CN(cov_file)[1]
|
||||
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
if snv_def_alleles != '*38/*38' and cn != '0':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
|
||||
|
||||
if cn == '2':
|
||||
|
||||
if 'or' in snv_def_alleles:
|
||||
print (snv_def_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = snv_def_alleles
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
if del_confirm == '*36/*36':
|
||||
gene_alleles = del_confirm
|
||||
print (gene_alleles)
|
||||
|
||||
elif del_confirm == '*36':
|
||||
gene_alleles = del_confirm + "/" + "*other"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "*36/*36"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == 'None':
|
||||
print (snv_def_alleles + "\t" + "Possible CYP2C19 gene deletion present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == 'None':
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*36"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
gene_alleles = samp_allele1 + "/" + "*36"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
if del_confirm == "*36/*36":
|
||||
del_confirm = "*36"
|
||||
else:
|
||||
del_confirm = "*36"
|
||||
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
if del_confirm == "*36/*36":
|
||||
del_confirm = "*36"
|
||||
else:
|
||||
del_confirm = "*36"
|
||||
|
||||
gene_alleles = del_confirm + "/" + samp_allele1
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
orig = snv_def_alleles
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
|
||||
print("Possible rare CYP2C19 structural variant present")
|
||||
331
pgx-main/scripts/cyp2c19/hg38/bin/sv_modules.py
Normal file
331
pgx-main/scripts/cyp2c19/hg38/bin/sv_modules.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_2c19_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
# av_e1_int4 = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
# av_int4_e9 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_2c19_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_2c19_cov), round(av_ctrl_cov)]; # , str(av_e1_int4), str(av_int4_e9)];
|
||||
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*(full_gene_del)/*(full_gene_del)"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*(full_gene_del)"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/av_read_cov, 4))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
# def hybrid_29_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
# if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
# return 'norm_var'
|
||||
|
||||
# elif 0.45 < float(cov_e1_int4)/float(cov_int4_e9) < 0.75:
|
||||
# return 'hyb_29'
|
||||
|
||||
# elif float(cov_e1_int4)/float(cov_int4_e9) < 0.15:
|
||||
# return 'hyb_29_2'
|
||||
|
||||
# else:
|
||||
# return 'norm_var'
|
||||
|
||||
|
||||
# def hybrid_30_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
# if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
# return 'norm_var'
|
||||
|
||||
# elif 0.45 < float(cov_int4_e9)/float(cov_e1_int4) < 0.75:
|
||||
# return 'hyb_30'
|
||||
|
||||
# elif float(cov_int4_e9)/float(cov_e1_int4) < 0.15:
|
||||
# return 'hyb_30_2'
|
||||
|
||||
# else:
|
||||
# return 'norm_var'
|
||||
|
||||
78
pgx-main/scripts/cyp2c8/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp2c8/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
461
pgx-main/scripts/cyp2c8/b37/bin/snv_def_modules.py
Normal file
461
pgx-main/scripts/cyp2c8/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,461 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
#print(score)
|
||||
|
||||
|
||||
# if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
# elem = "17.v1_4.v1"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
# print(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
71
pgx-main/scripts/cyp2c8/b37/bin/stellarpgx.py
Normal file
71
pgx-main/scripts/cyp2c8/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2C8 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp2c8/b37/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp2c8/b37/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp2c8/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp2c8/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
459
pgx-main/scripts/cyp2c8/hg38/bin/snv_def_modules.py
Normal file
459
pgx-main/scripts/cyp2c8/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,459 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
#print(score)
|
||||
|
||||
|
||||
# if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
# elem = "17.v1_4.v1"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
# print(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
71
pgx-main/scripts/cyp2c8/hg38/bin/stellarpgx.py
Normal file
71
pgx-main/scripts/cyp2c8/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2C8 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp2c8/hg38/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp2c8/hg38/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp2c9/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp2c9/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
442
pgx-main/scripts/cyp2c9/b37/bin/snv_def_modules.py
Normal file
442
pgx-main/scripts/cyp2c9/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,442 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2 and (soln_list1[0] == soln_list1[1]) :
|
||||
diplo = soln_list1[0]
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1[0], diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
#print(score)
|
||||
|
||||
|
||||
# if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
# elem = "17.v1_4.v1"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
# print(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
72
pgx-main/scripts/cyp2c9/b37/bin/stellarpgx.py
Normal file
72
pgx-main/scripts/cyp2c9/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2C9 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp2c9/b37/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp2c9/b37/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp2c9/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp2c9/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
444
pgx-main/scripts/cyp2c9/hg38/bin/snv_def_modules.py
Normal file
444
pgx-main/scripts/cyp2c9/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,444 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2 and (soln_list1[0] == soln_list1[1]) :
|
||||
diplo = soln_list1[0]
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1[0], diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
# print(soln_list1)
|
||||
|
||||
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
#print(score)
|
||||
|
||||
|
||||
# if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
# elem = "17.v1_4.v1"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
# print(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
70
pgx-main/scripts/cyp2c9/hg38/bin/stellarpgx.py
Normal file
70
pgx-main/scripts/cyp2c9/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2C9 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp2c9/hg38/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp2c9/hg38/bin/sv_modules.py
Normal file
77
pgx-main/scripts/cyp2d6/b37/bin/bkg_modules.py
Normal file
77
pgx-main/scripts/cyp2d6/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '2.v1_2.v1'
|
||||
allele_res = '*2/*2'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
1056
pgx-main/scripts/cyp2d6/b37/bin/snv_def_modules.py
Normal file
1056
pgx-main/scripts/cyp2d6/b37/bin/snv_def_modules.py
Normal file
File diff suppressed because it is too large
Load Diff
802
pgx-main/scripts/cyp2d6/b37/bin/stellarpgx.py
Normal file
802
pgx-main/scripts/cyp2d6/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,802 @@
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2D6 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if int(cn) == 0:
|
||||
print("\nResult:")
|
||||
print("*5/*5")
|
||||
|
||||
elif bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
elif bac_alleles != None and int(cn) < 2:
|
||||
bac_alleles = bac_alleles[0].split("/")
|
||||
bac_alleles1 = bac_alleles[0] + "/" + "*5"
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles1 + "]")
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
print("\nActivity score:")
|
||||
print("Indeterminate")
|
||||
|
||||
print("\nMetaboliser status:")
|
||||
print("Indeterminate")
|
||||
|
||||
|
||||
sys.exit()
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
# cn = get_total_CN(cov_file)[0]
|
||||
av_cov = get_total_CN(cov_file)[3]
|
||||
cn_in1_3pr = get_total_CN(cov_file)[2]
|
||||
cn_ex9_3pr = get_total_CN(cov_file)[4]
|
||||
in1_3pr_float = get_total_CN(cov_file)[5]
|
||||
|
||||
cov_in4_3pr = get_total_CN(cov_file)[6]
|
||||
cov_5pr_in4 = get_total_CN(cov_file)[7]
|
||||
cn_2d7_ex9 = get_total_CN(cov_file)[8]
|
||||
cn_2d7_in4_in8 = get_total_CN(cov_file)[9]
|
||||
cov_2d7_ex2_in8 = get_total_CN(cov_file)[10]
|
||||
cov_2d7_5pr_in1 = get_total_CN(cov_file)[11]
|
||||
|
||||
|
||||
# print(float(cn_ex9_3pr))
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
if snv_def_alleles != '*2/*2':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
|
||||
|
||||
if cn == '2' and snv_def_alleles == '*4/*4':
|
||||
|
||||
test_68 = hyb_test_5_68_4(sv_del, in1_3pr_float, av_cov)
|
||||
|
||||
if test_68 == 'norm_art':
|
||||
pass
|
||||
elif test_68 == 'del_hyb':
|
||||
snv_def_alleles = (snv_def_alleles.replace('*4', '*5', 1)).replace('*4', '*68+*4')
|
||||
|
||||
gene_alleles = snv_def_alleles
|
||||
print(snv_def_alleles)
|
||||
|
||||
|
||||
|
||||
elif cn == '2':
|
||||
if 'or' in snv_def_alleles:
|
||||
|
||||
print (snv_def_alleles)
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
|
||||
|
||||
if snv_def_alleles[0] == '*2' or snv_def_alleles[1] == '*2':
|
||||
ind_star2 = snv_def_alleles.index('*2')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
|
||||
|
||||
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
|
||||
|
||||
if test_13_2_v1 == 'norm_var':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_13_2_v1 == 'hyb_13_2':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*13+*2"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_13_2_v2 == 'hyb_13_2_v2':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*13"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == '*39' or snv_def_alleles[1] == '*39':
|
||||
ind_star2 = snv_def_alleles.index('*39')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_83_single = hybrid_test_83_single(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
if test_83_single == 'norm_star39':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_83_single == 'hyb_83_single':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*83"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == '*10' or snv_def_alleles[1] == '*10':
|
||||
ind_star2 = snv_def_alleles.index('*10')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_36_single = hybrid_test_36_single(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
if test_36_single == 'norm_star10':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_36_single == 'hyb_36_single':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*36"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
else:
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
if del_confirm == '*5/*5':
|
||||
gene_alleles = del_confirm
|
||||
print (gene_alleles)
|
||||
|
||||
elif del_confirm == '*5':
|
||||
gene_alleles = del_confirm + "/" + "*other"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "*5/*5"
|
||||
print(gene_alleles)
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == 'None':
|
||||
print (snv_def_alleles + "\t" + "Possible CYP2D6 gene deletion (*5) present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == 'None':
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*5"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
gene_alleles = samp_allele1 + "/" + "*5"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
if del_confirm == "*5/*5":
|
||||
del_confirm = "*5"
|
||||
else:
|
||||
del_confirm = "*5"
|
||||
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
if del_confirm == "*5/*5":
|
||||
del_confirm = "*5"
|
||||
else:
|
||||
del_confirm = "*5"
|
||||
|
||||
gene_alleles = del_confirm + "/" + samp_allele1
|
||||
print(gene_alleles)
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
orig = snv_def_alleles
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
|
||||
if snv_def_alleles[0] == '*90' or snv_def_alleles[1] == '*90':
|
||||
|
||||
alt_allele_ind = 1 - snv_def_alleles.index('*90')
|
||||
alt_allele = snv_def_alleles[alt_allele_ind]
|
||||
sp_allele = tandem_90_1(in_list, alt_allele, cn)
|
||||
|
||||
|
||||
sp_allele1 = sp_allele.split("/")
|
||||
|
||||
|
||||
if "*10x2" in sp_allele1:
|
||||
|
||||
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8)
|
||||
|
||||
if test_36 == 'norm_dup':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
sp_allele = sp_allele.replace('*10x2', '*36+*10')
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
sp_allele = sp_allele.replace('*10x2', '*36x2')
|
||||
|
||||
gene_alleles = sp_allele
|
||||
print(sp_allele)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == '*57' or snv_def_alleles[1] == '*57':
|
||||
|
||||
alt_allele_ind = 1 - snv_def_alleles.index('*57')
|
||||
alt_allele = snv_def_alleles[alt_allele_ind]
|
||||
sp_allele = tandem_57_10(in_list, alt_allele, cn)
|
||||
|
||||
print(sp_allele)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
if phased_dup == 'check':
|
||||
phased_dup == 'No_call'
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
|
||||
if '*4x2' in phased_dup1:
|
||||
count1 = phased_dup1.count('*4x2')
|
||||
a_ind1 = phased_dup1.index('*4x2')
|
||||
a_ind2 = 1 - a_ind1
|
||||
other_hap = phased_dup1[a_ind2]
|
||||
|
||||
if count1 == 1:
|
||||
|
||||
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
|
||||
|
||||
|
||||
if test_68 == 'norm_dup':
|
||||
pass
|
||||
elif test_68 == 'hyb_68':
|
||||
if int(cn_in1_3pr) < int(cn):
|
||||
phased_dup = phased_dup.replace('*4x2', '*68+*4')
|
||||
|
||||
elif int(cn_in1_3pr) == int(cn) and ('x' not in other_hap) and int(cn) == 4:
|
||||
phased_dup = phased_dup.replace('*4x2', '*68+*4')
|
||||
phased_dup = phased_dup.replace(other_hap, (other_hap + 'x2'))
|
||||
|
||||
elif count1 == 2:
|
||||
pass
|
||||
|
||||
if '*4x3' in phased_dup1:
|
||||
count1 = phased_dup1.count('*4x3')
|
||||
a_ind1 = phased_dup1.index('*4x3')
|
||||
a_ind2 = 1 - a_ind1
|
||||
other_hap = phased_dup1[a_ind2]
|
||||
|
||||
if count1 == 1:
|
||||
|
||||
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
|
||||
|
||||
if test_68 == 'norm_dup':
|
||||
pass
|
||||
elif test_68 == 'hyb_68':
|
||||
if int(cn_in1_3pr) < int(cn):
|
||||
phased_dup = phased_dup.replace('*4x3', '*68+*4')
|
||||
|
||||
elif int(cn_in1_3pr) == int(cn) and 'x' not in other_hap:
|
||||
phased_dup = phased_dup.replace('*4x3', '*68+*4')
|
||||
|
||||
elif count1 == 2:
|
||||
pass
|
||||
|
||||
|
||||
if '*10x2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*10x2')
|
||||
b_ind1 = phased_dup1.index('*10x2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
|
||||
if count2 == 1:
|
||||
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_36 == 'norm_dup':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x2', '*36+*10')
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x2', '*36x2')
|
||||
|
||||
|
||||
if '*10x3' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x3')
|
||||
c_ind1 = phased_dup1.index('*10x3')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
|
||||
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36x2+*10')
|
||||
|
||||
|
||||
if '*1x3' in phased_dup1:
|
||||
count2 = phased_dup1.count('*1x3')
|
||||
b_ind1 = phased_dup1.index('*1x3')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
|
||||
if count2 == 1:
|
||||
test_83 = hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_83 == 'norm_star39':
|
||||
pass
|
||||
|
||||
elif test_83 == 'hyb_83':
|
||||
phased_dup = phased_dup.replace('*1x3', '*1x2+*83')
|
||||
|
||||
|
||||
|
||||
if '*2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*2')
|
||||
b_ind1 = phased_dup1.index('*2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
if count2 == 1:
|
||||
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
|
||||
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
|
||||
|
||||
if test_13_2_v1 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v2 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v1 == 'hyb_13_2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
elif test_13_2_v2 == 'hyb_13_2_v2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13'
|
||||
|
||||
|
||||
|
||||
if '*2x2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*2x2')
|
||||
b_ind1 = phased_dup1.index('*2x2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
if count2 == 1:
|
||||
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
|
||||
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
|
||||
|
||||
if test_13_2_v1 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v2 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v1 == 'hyb_13_2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
elif test_13_2_v2 == 'hyb_13_2_v2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
if '*4x2' in phased_dup1:
|
||||
count1 = phased_dup1.count('*4x2')
|
||||
a_ind1 = phased_dup1.index('*4x2')
|
||||
a_ind2 = 1 - a_ind1
|
||||
|
||||
|
||||
if count1 == 1:
|
||||
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
|
||||
|
||||
if test_68 == 'norm_dup':
|
||||
pass
|
||||
|
||||
elif test_68 == 'hyb_68':
|
||||
phased_dup.replace('*4x2', '*68+*4')
|
||||
|
||||
|
||||
if '*10x2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*10x2')
|
||||
b_ind1 = phased_dup1.index('*10x2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
if count2 == 1:
|
||||
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
if test_36 == 'norm_dup':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x2', '*36+*10')
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x2', '*36x2')
|
||||
|
||||
if '*10x3' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x3')
|
||||
c_ind1 = phased_dup1.index('*10x3')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
|
||||
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = '*36+*10/*36+*10'
|
||||
|
||||
|
||||
if '*1x3' in phased_dup1:
|
||||
count2 = phased_dup1.count('*1x3')
|
||||
b_ind1 = phased_dup1.index('*1x3')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
|
||||
if count2 == 1:
|
||||
test_83 = hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_83 == 'norm_star39':
|
||||
pass
|
||||
|
||||
elif test_83 == 'hyb_83':
|
||||
phased_dup = phased_dup.replace('*1x3', '*1x2+*83')
|
||||
|
||||
|
||||
if '*2x2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*2x2')
|
||||
b_ind1 = phased_dup1.index('*2x2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
if count2 == 1:
|
||||
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
|
||||
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
|
||||
|
||||
if test_13_2_v1 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v2 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v1 == 'hyb_13_2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
elif test_13_2_v2 == 'hyb_13_2_v2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
if phased_dup == 'check':
|
||||
phased_dup = 'No_call'
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
if '*10x4' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x4')
|
||||
c_ind1 = phased_dup1.index('*10x4')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x4', '*36+*10x3')
|
||||
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x4', '*36x2+*10x2')
|
||||
|
||||
elif test_36 == 'hyb_36_36_36':
|
||||
phased_dup = phased_dup.replace('*10x4','*36x3+*10')
|
||||
|
||||
else:
|
||||
phased_dup = "No_call"
|
||||
|
||||
|
||||
elif '*10x3' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x3')
|
||||
c_ind1 = phased_dup1.index('*10x3')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36x2+*10')
|
||||
|
||||
elif test_36 == 'hyb_36_36_36':
|
||||
phased_dup = phased_dup.replace('*10x3','*36x3')
|
||||
|
||||
elif '*10x' in phased_dup1:
|
||||
phased_dup = 'No_call'
|
||||
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
if phased_dup == 'check':
|
||||
phased_dup = 'No_call'
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
if '*10x4' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x4')
|
||||
c_ind1 = phased_dup1.index('*10x4')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x4', '*36+*10x3')
|
||||
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = '*36+*10/*36+*10x2'
|
||||
|
||||
elif test_36 == 'hyb_36_36_36':
|
||||
phased_dup = '*36+*10/*36x2+*10'
|
||||
|
||||
else:
|
||||
phased_dup = "No_call"
|
||||
|
||||
elif phased_dup1[0].startswith('*10x') or phased_dup1[1].startswith('*10x'):
|
||||
|
||||
if phased_dup1[0].startswith('*10x'):
|
||||
dup_10_hyb = phased_dup1[0]
|
||||
|
||||
elif phased_dup1[1].startswith('*10x'):
|
||||
dup_10_hyb = phased_dup1[1]
|
||||
|
||||
cn_star10 = dup_10_hyb[(dup_10_hyb.find('x') + 1 ):]
|
||||
|
||||
test_36 = hybrid_test_36_multi_10(sv_dup, cn, av_cov, cn_ex9_3pr, cn_star10)
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'check':
|
||||
phased_dup = 'No_call'
|
||||
|
||||
else:
|
||||
c_ind1 = phased_dup1.index(dup_10_hyb)
|
||||
c_ind2 = 1 - c_ind1
|
||||
phased_dup = str(phased_dup1[c_ind2]) + "/" + test_36
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
|
||||
print("Possible rare CYP2D6/2D7 hybrid present")
|
||||
|
||||
|
||||
|
||||
print("\nActivity score:")
|
||||
|
||||
score_list = []
|
||||
|
||||
score_list1 = []
|
||||
score_list2 = []
|
||||
score_list3 = []
|
||||
|
||||
allele_dict = {}
|
||||
|
||||
def get_ac_score(act_score, star_alleles):
|
||||
for line in open(act_score, "r"):
|
||||
line = line.strip().split()
|
||||
score_list.append(line)
|
||||
|
||||
for i in score_list:
|
||||
allele_dict[i[0]] = i[1]
|
||||
|
||||
star_alleles = star_alleles.replace("/", "+")
|
||||
star_alleles = star_alleles.split("+")
|
||||
|
||||
for elem in star_alleles:
|
||||
if "x" not in elem:
|
||||
m_allele = elem
|
||||
n_allele = "1"
|
||||
elif "x" in elem:
|
||||
index1 = elem.find("x")
|
||||
m_allele = elem[:index1]
|
||||
n_allele = elem[index1+1:]
|
||||
|
||||
p_allele = allele_dict[m_allele] + "_" + n_allele
|
||||
p_allele = p_allele.split("_")
|
||||
score_list1.append(p_allele)
|
||||
|
||||
for i in score_list1:
|
||||
score_list2.append(i[0])
|
||||
|
||||
if "n" in score_list2:
|
||||
return "Indeterminate"
|
||||
|
||||
else:
|
||||
for i in score_list1:
|
||||
score_list3.append(float(i[0])*float(i[1]))
|
||||
|
||||
total_a_score = sum(score_list3)
|
||||
return total_a_score
|
||||
|
||||
|
||||
if gene_alleles in ["",'No_call','check']:
|
||||
ac_score = "Indeterminate"
|
||||
print(ac_score)
|
||||
|
||||
|
||||
elif gene_alleles != "":
|
||||
ac_score = get_ac_score(act_score, gene_alleles)
|
||||
print(ac_score)
|
||||
|
||||
|
||||
print("\nMetaboliser status:")
|
||||
|
||||
if ac_score == "Indeterminate":
|
||||
print ("Indeterminate")
|
||||
|
||||
elif ac_score == 0:
|
||||
print("Poor metaboliser (PM)")
|
||||
|
||||
elif 0 < ac_score < 1.25:
|
||||
print("Intermediate metaboliser (IM)")
|
||||
|
||||
elif 1.25 <= ac_score <= 2.25:
|
||||
print("Normal metaboliser (NM)")
|
||||
|
||||
elif ac_score > 2.25:
|
||||
print("Ultrarapid metaboliser (UM)")
|
||||
|
||||
611
pgx-main/scripts/cyp2d6/b37/bin/sv_modules.py
Normal file
611
pgx-main/scripts/cyp2d6/b37/bin/sv_modules.py
Normal file
@@ -0,0 +1,611 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_2d6_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
av_vdr_cov = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
av_in1_3pr = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_ex9_3pr = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_5pr_in1 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
av_in4_3pr = float(all_reg[5][3])/(float(all_reg[5][2]) - float(all_reg[5][1]))
|
||||
av_5pr_in4 = float(all_reg[6][3])/(float(all_reg[6][2]) - float(all_reg[6][1]))
|
||||
av_2d7_ex9 = float(all_reg[7][3])/(float(all_reg[7][2]) - float(all_reg[7][1]))
|
||||
av_2d7_in4_in8 = float(all_reg[8][3])/(float(all_reg[8][2]) - float(all_reg[8][1]))
|
||||
av_egfr_cov = float(all_reg[9][3])/(float(all_reg[9][2]) - float(all_reg[9][1]))
|
||||
av_2d7_ex2_in8 = float(all_reg[10][3])/(float(all_reg[10][2]) - float(all_reg[10][1]))
|
||||
av_2d7_5pr_in1 = float(all_reg[11][3])/(float(all_reg[11][2]) - float(all_reg[11][1]))
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
# av_ctrl_cov = av_vdr_cov
|
||||
|
||||
comp_av = av_2d6_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av + 0.15
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
in1_3pr = round(2 * av_in1_3pr/av_ctrl_cov)
|
||||
ex9_3pr = (2 * av_ex9_3pr/av_ctrl_cov)
|
||||
|
||||
|
||||
return [str(total_cn), round(av_2d6_cov), str(int(in1_3pr)), round(av_ctrl_cov), str(ex9_3pr), round(av_in1_3pr), str(av_in4_3pr), str(av_5pr_in4), str(av_2d7_ex9), str(av_2d7_in4_in8), str(av_2d7_ex2_in8), str(av_2d7_5pr_in1)];
|
||||
|
||||
|
||||
samp_gt = ""
|
||||
samp_gt_hap1 = ""
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*5/*5"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*5"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/norm_cov, 3))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
test_list3 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
# return het_list
|
||||
|
||||
# if len(het_list) > 1 and het_list[0][0] == "42522613~G>C":
|
||||
# het_list.pop(0)
|
||||
# else:
|
||||
# pass
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
test_list3.append(float(i[-4]))
|
||||
|
||||
max_het = max(test_list2)
|
||||
|
||||
# if max_het > 1:
|
||||
# max_het = min(test_list2)
|
||||
# else:
|
||||
# pass
|
||||
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
if max_het > 1:
|
||||
max_het = test_list3[max_het_pos]
|
||||
elif max_het > test_list3[max_het_pos]:
|
||||
max_het = test_list3[max_het_pos]
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
# return hdb_list
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
|
||||
# return allele_cn_list
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[3] < -1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(3)
|
||||
|
||||
elif allele_cn_list[1] < -1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(3)
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
# if len(het_list) > 1 and het_list[0][0] == "42522613~G>C":
|
||||
# het_list.pop(0)
|
||||
# else:
|
||||
# pass
|
||||
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[3] < 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] + allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[1] < 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] + allele_cn_list[1] - 1)
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
def hybrid_test_68(sv_dup, c_num, av_cov, cn_in1_3pr1, in_list):
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
test_list3 = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(abs(float(i[-2])))
|
||||
test_list3.append(i[-1])
|
||||
|
||||
index1 = test_list1.index('42526694~G>A')
|
||||
index2 = test_list1.index('42524947~C>T')
|
||||
|
||||
val_68 = test_list3[index1]
|
||||
val_4 = test_list3[index2]
|
||||
|
||||
rt = val_68/val_4
|
||||
|
||||
|
||||
if rt <= 1.4:
|
||||
return 'norm_dup'
|
||||
|
||||
elif rt > 1.4:
|
||||
return 'hyb_68'
|
||||
|
||||
|
||||
else:
|
||||
return 'norm_dup'
|
||||
|
||||
|
||||
def hyb_test_5_68_4(sv_del, in1_3pr1_float, av_cov):
|
||||
test_del = []
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
test_del.append(line.strip())
|
||||
|
||||
t1 = 2 * in1_3pr1_float/av_cov
|
||||
|
||||
if len(test_del) == 0 and (1.6 < t1 < 2.8):
|
||||
return 'norm_art'
|
||||
|
||||
elif len(test_del) > 0 and t1 < 1.6:
|
||||
return 'del_hyb'
|
||||
|
||||
|
||||
def hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_dup'
|
||||
|
||||
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_36_10'
|
||||
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.65):
|
||||
return 'hyb_36_36'
|
||||
|
||||
|
||||
def hybrid_test_36_single(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_star10'
|
||||
|
||||
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_36_single'
|
||||
|
||||
else:
|
||||
return 'norm_star10'
|
||||
|
||||
|
||||
|
||||
|
||||
def hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_mt'
|
||||
|
||||
elif ((int(cn) - 1) - 0.3) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_36_10'
|
||||
elif ((int(cn) - 2) - 0.3) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.7):
|
||||
return 'hyb_36_36'
|
||||
|
||||
|
||||
def hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_mt'
|
||||
|
||||
elif ((int(cn) - 1) - 0.05) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_36_10'
|
||||
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.95):
|
||||
return 'hyb_36_36'
|
||||
elif (int(cn) - 3) <= float(cn_ex9_3pr) < (int(cn) - 3 + 0.95):
|
||||
return 'hyb_36_36_36'
|
||||
else:
|
||||
return 'check'
|
||||
|
||||
|
||||
def hybrid_test_36_multi_10(sv_dup, cn, av_cov, cn_ex9_3pr, cn_star10):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_mt'
|
||||
|
||||
elif float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
cn_star36 = int(cn) - int(round(float(cn_ex9_3pr)))
|
||||
adj_cn_star10 = int(cn_star10) - cn_star36
|
||||
|
||||
if cn_star36 == 1:
|
||||
return '*36+*10x' + str(adj_cn_star10)
|
||||
else:
|
||||
return '*36x' + str(cn_star36) + '+*10x' + str(adj_cn_star10)
|
||||
|
||||
else:
|
||||
return 'check'
|
||||
|
||||
|
||||
def hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4):
|
||||
|
||||
if 0.85 < float(cov_in4_3pr)/float(cov_5pr_in4) < 1.2:
|
||||
return 'norm_var'
|
||||
elif 0.45 < float(cov_in4_3pr)/float(cov_5pr_in4) < 0.75:
|
||||
return 'hyb_13_2'
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
def hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1):
|
||||
|
||||
if 0.85 < float(cov_2d7_ex2_in8)/float(cov_2d7_5pr_in1) < 1.2:
|
||||
return 'norm_var'
|
||||
elif 0.45 < float(cov_2d7_ex2_in8)/float(cov_2d7_5pr_in1) < 0.75:
|
||||
return 'hyb_13_2_v2'
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
|
||||
def tandem_90_1(in_list, alt_allele, cn):
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
test_list3 = []
|
||||
|
||||
for i in in_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(abs(float(i[-2])))
|
||||
test_list3.append(i[-1])
|
||||
|
||||
|
||||
if len(test_list1) > 1:
|
||||
index1 = test_list1.index('42525100~T>C')
|
||||
a = test_list3[index1]
|
||||
test_list3.pop(index1)
|
||||
b = max(test_list3)
|
||||
|
||||
c = round(b/a)
|
||||
|
||||
if int(cn) == 3 and c == 1:
|
||||
res = alt_allele + "/" + "*90+*1"
|
||||
|
||||
elif int(cn) == 3 and c > 1:
|
||||
res = alt_allele + "x2" + "/" + "*90"
|
||||
|
||||
elif int(cn) == 4 and c == 2:
|
||||
res = alt_allele + "x2" + "/" + "*90+*1"
|
||||
|
||||
elif int(cn) == 4 and c >= 3:
|
||||
res = alt_allele + "x3" + "/" + "*90"
|
||||
|
||||
else:
|
||||
val1 = test_list2[0]
|
||||
val2 = round(val1 * int(cn))
|
||||
|
||||
if int(cn) == 3 and val2 == 1:
|
||||
res = '*1/*90+*1'
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def tandem_57_10(in_list, alt_allele, cn):
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
test_list3 = []
|
||||
|
||||
for i in in_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(abs(float(i[-2])))
|
||||
test_list3.append(i[-1])
|
||||
|
||||
if len(test_list1) > 1:
|
||||
index1 = test_list1.index('42525908~G>A')
|
||||
a = test_list3[index1]
|
||||
test_list3.pop(index1)
|
||||
index2 = test_list1.index('42526694~G>A')
|
||||
m = test_list3[index2]
|
||||
test_list3.pop(index2)
|
||||
b = max(test_list3)
|
||||
|
||||
c = round(b/a)
|
||||
p = round(m/a)
|
||||
|
||||
if int(cn) == 3 and c == 1 and p > 1:
|
||||
res = alt_allele + "/" + "*57+*10"
|
||||
|
||||
elif int(cn) == 3 and c > 1 and p == 1:
|
||||
res = alt_allele + "x2" + "/" + "*57"
|
||||
|
||||
elif int(cn) == 4 and c == 2 and p > 1:
|
||||
res = alt_allele + "x2" + "/" + "*57+*10"
|
||||
|
||||
elif int(cn) == 4 and c >= 3 and p == 1:
|
||||
res = alt_allele + "x3" + "/" + "*57"
|
||||
|
||||
elif int(cn) == 4 and p == 1 and alt_allele == '*10':
|
||||
res = "*57+*10" + "/" + "*57+*10"
|
||||
|
||||
elif int(cn) == 4 and p > 1 and alt_allele == '*10':
|
||||
res = "*10x2" + "/" + "*57+*10"
|
||||
|
||||
|
||||
else:
|
||||
res = alt_allele + "/" + "*57+*10"
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def hybrid_test_83_single(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_star39'
|
||||
|
||||
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_83_single'
|
||||
|
||||
else:
|
||||
return 'norm_star39'
|
||||
|
||||
|
||||
def hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_star39'
|
||||
|
||||
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_83'
|
||||
|
||||
else:
|
||||
return 'norm_star39'
|
||||
|
||||
77
pgx-main/scripts/cyp2d6/hg38/bin/bkg_modules.py
Normal file
77
pgx-main/scripts/cyp2d6/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
1204
pgx-main/scripts/cyp2d6/hg38/bin/snv_def_modules.py
Normal file
1204
pgx-main/scripts/cyp2d6/hg38/bin/snv_def_modules.py
Normal file
File diff suppressed because it is too large
Load Diff
810
pgx-main/scripts/cyp2d6/hg38/bin/stellarpgx.py
Normal file
810
pgx-main/scripts/cyp2d6/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,810 @@
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2D6 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if int(cn) == 0:
|
||||
print("\nResult:")
|
||||
print("*5/*5")
|
||||
|
||||
elif bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
elif bac_alleles != None and int(cn) < 2:
|
||||
bac_alleles = bac_alleles[0].split("/")
|
||||
bac_alleles1 = bac_alleles[0] + "/" + "*5"
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles1 + "]")
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
print("\nActivity score:")
|
||||
print("Indeterminate")
|
||||
|
||||
print("\nMetaboliser status:")
|
||||
print("Indeterminate")
|
||||
|
||||
|
||||
sys.exit()
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
|
||||
av_cov = get_total_CN(cov_file)[3]
|
||||
cn_in1_3pr = get_total_CN(cov_file)[2]
|
||||
cn_ex9_3pr = get_total_CN(cov_file)[4]
|
||||
in1_3pr_float = get_total_CN(cov_file)[5]
|
||||
cov_in4_3pr = get_total_CN(cov_file)[6]
|
||||
cov_5pr_in4 = get_total_CN(cov_file)[7]
|
||||
cn_2d7_ex9 = get_total_CN(cov_file)[8]
|
||||
cn_2d7_in4_in8 = get_total_CN(cov_file)[9]
|
||||
cov_2d7_ex2_in8 = get_total_CN(cov_file)[10]
|
||||
cov_2d7_5pr_in1 = get_total_CN(cov_file)[11]
|
||||
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
if snv_def_alleles != '*1/*1':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
if cn == '2' and snv_def_alleles == '*4/*4':
|
||||
|
||||
test_68 = hyb_test_5_68_4(sv_del, in1_3pr_float, av_cov)
|
||||
|
||||
if test_68 == 'norm_art':
|
||||
pass
|
||||
elif test_68 == 'del_hyb':
|
||||
snv_def_alleles = (snv_def_alleles.replace('*4', '*5', 1)).replace('*4', '*68+*4')
|
||||
|
||||
gene_alleles = snv_def_alleles
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '2':
|
||||
# print(snv_def_alleles)
|
||||
if 'or' in snv_def_alleles:
|
||||
# print ("\n")
|
||||
print (snv_def_alleles)
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
|
||||
|
||||
if snv_def_alleles[0] == '*2' or snv_def_alleles[1] == '*2':
|
||||
ind_star2 = snv_def_alleles.index('*2')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
|
||||
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
|
||||
|
||||
if test_13_2_v1 == 'norm_var':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_13_2_v1 == 'hyb_13_2':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*13+*2"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_13_2_v2 == 'hyb_13_2_v2':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*13"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == '*39' or snv_def_alleles[1] == '*39':
|
||||
ind_star2 = snv_def_alleles.index('*39')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_83_single = hybrid_test_83_single(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
if test_83_single == 'norm_star39':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_83_single == 'hyb_83_single':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*83"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == '*10' or snv_def_alleles[1] == '*10':
|
||||
ind_star2 = snv_def_alleles.index('*10')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_36_single = hybrid_test_36_single(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
if test_36_single == 'norm_star10':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_36_single == 'hyb_36_single':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*36"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
else:
|
||||
# print("\n")
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
if del_confirm == '*5/*5':
|
||||
gene_alleles = del_confirm
|
||||
print(gene_alleles)
|
||||
|
||||
elif del_confirm == '*5':
|
||||
samp_dip = del_confirm + "/" + "*other"
|
||||
print(samp_dip)
|
||||
|
||||
else:
|
||||
gene_alleles = "*5/*5"
|
||||
print (gene_alleles)
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == 'None':
|
||||
print (snv_def_alleles + "\t" + "Possible CYP2D6 gene deletion (*5) present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == 'None':
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*5"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
gene_alleles = samp_allele1 + "/" + "*5"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
if del_confirm == "*5/*5":
|
||||
del_confirm = "*5"
|
||||
else:
|
||||
del_confirm = "*5"
|
||||
|
||||
gene_alleles = (del_confirm + "/" + snv_def_alleles[0])
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
if del_confirm == "*5/*5":
|
||||
del_confirm = "*5"
|
||||
else:
|
||||
del_confirm = "*5"
|
||||
|
||||
gene_alleles = (del_confirm + "/" + samp_allele1)
|
||||
print(gene_alleles)
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
orig = snv_def_alleles
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
|
||||
if snv_def_alleles[0] == '*90' or snv_def_alleles[1] == '*90':
|
||||
|
||||
alt_allele_ind = 1 - snv_def_alleles.index('*90')
|
||||
alt_allele = snv_def_alleles[alt_allele_ind]
|
||||
sp_allele = tandem_90_1(in_list, alt_allele, cn)
|
||||
|
||||
|
||||
sp_allele1 = sp_allele.split("/")
|
||||
|
||||
if "*10x2" in sp_allele1:
|
||||
|
||||
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8)
|
||||
|
||||
if test_36 == 'norm_dup':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
sp_allele = sp_allele.replace('*10x2', '*36+*10')
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
sp_allele = sp_allele.replace('*10x2', '*36x2')
|
||||
|
||||
gene_alleles = sp_allele
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == '*57' or snv_def_alleles[1] == '*57':
|
||||
|
||||
alt_allele_ind = 1 - snv_def_alleles.index('*57')
|
||||
alt_allele = snv_def_alleles[alt_allele_ind]
|
||||
sp_allele = tandem_57_10(in_list, alt_allele, cn)
|
||||
|
||||
|
||||
print(sp_allele)
|
||||
|
||||
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
if phased_dup == 'check':
|
||||
phased_dup == 'No_call'
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
|
||||
if '*4x2' in phased_dup1:
|
||||
count1 = phased_dup1.count('*4x2')
|
||||
a_ind1 = phased_dup1.index('*4x2')
|
||||
a_ind2 = 1 - a_ind1
|
||||
other_hap = phased_dup1[a_ind2]
|
||||
|
||||
if count1 == 1:
|
||||
|
||||
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
|
||||
|
||||
if test_68 == 'norm_dup':
|
||||
pass
|
||||
elif test_68 == 'hyb_68':
|
||||
if int(cn_in1_3pr) < int(cn):
|
||||
phased_dup = phased_dup.replace('*4x2', '*68+*4')
|
||||
|
||||
elif int(cn_in1_3pr) == int(cn) and ('x' not in other_hap) and int(cn) == 4:
|
||||
phased_dup = phased_dup.replace('*4x2', '*68+*4')
|
||||
phased_dup = phased_dup.replace(other_hap, (other_hap + 'x2'))
|
||||
|
||||
else:
|
||||
phased_dup = phased_dup.replace('*4x2', '*68+*4')
|
||||
|
||||
elif count1 == 2:
|
||||
pass
|
||||
|
||||
if '*4x3' in phased_dup1:
|
||||
count1 = phased_dup1.count('*4x3')
|
||||
a_ind1 = phased_dup1.index('*4x3')
|
||||
a_ind2 = 1 - a_ind1
|
||||
other_hap = phased_dup1[a_ind2]
|
||||
|
||||
if count1 == 1:
|
||||
|
||||
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
|
||||
|
||||
if test_68 == 'norm_dup':
|
||||
pass
|
||||
elif test_68 == 'hyb_68':
|
||||
if int(cn_in1_3pr) < int(cn):
|
||||
phased_dup = phased_dup.replace('*4x3', '*68+*4')
|
||||
|
||||
elif int(cn_in1_3pr) == int(cn) and 'x' not in other_hap:
|
||||
phased_dup = phased_dup.replace('*4x3', '*68+*4')
|
||||
# phased_dup = phased_dup.replace(other_hap, (other_hap + 'x2'))
|
||||
|
||||
elif count1 == 2:
|
||||
pass
|
||||
|
||||
|
||||
if '*10x2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*10x2')
|
||||
b_ind1 = phased_dup1.index('*10x2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
|
||||
if count2 == 1:
|
||||
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8)
|
||||
|
||||
|
||||
if test_36 == 'norm_dup':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x2', '*36+*10')
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x2', '*36x2')
|
||||
|
||||
|
||||
if '*10x3' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x3')
|
||||
c_ind1 = phased_dup1.index('*10x3')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
|
||||
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36x2+*10')
|
||||
|
||||
|
||||
if '*1x3' in phased_dup1:
|
||||
count2 = phased_dup1.count('*1x3')
|
||||
b_ind1 = phased_dup1.index('*1x3')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
|
||||
if count2 == 1:
|
||||
test_83 = hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_83 == 'norm_star39':
|
||||
pass
|
||||
|
||||
elif test_83 == 'hyb_83':
|
||||
phased_dup = phased_dup.replace('*1x3', '*1x2+*83')
|
||||
|
||||
|
||||
|
||||
if '*2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*2')
|
||||
b_ind1 = phased_dup1.index('*2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
if count2 == 1:
|
||||
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
|
||||
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
|
||||
|
||||
if test_13_2_v1 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v2 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v1 == 'hyb_13_2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
elif test_13_2_v2 == 'hyb_13_2_v2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13'
|
||||
|
||||
|
||||
if '*2x2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*2x2')
|
||||
b_ind1 = phased_dup1.index('*2x2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
if count2 == 1:
|
||||
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
|
||||
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
|
||||
|
||||
if test_13_2_v1 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v2 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v1 == 'hyb_13_2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
elif test_13_2_v2 == 'hyb_13_2_v2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
if '*4x2' in phased_dup1:
|
||||
count1 = phased_dup1.count('*4x2')
|
||||
a_ind1 = phased_dup1.index('*4x2')
|
||||
a_ind2 = 1 - a_ind1
|
||||
|
||||
|
||||
if count1 == 1:
|
||||
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
|
||||
|
||||
if test_68 == 'norm_dup':
|
||||
pass
|
||||
|
||||
elif test_68 == 'hyb_68':
|
||||
phased_dup.replace('*4x2', '*68+*4')
|
||||
|
||||
|
||||
if '*10x2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*10x2')
|
||||
b_ind1 = phased_dup1.index('*10x2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
if count2 == 1:
|
||||
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8)
|
||||
# print (test_36)
|
||||
|
||||
if test_36 == 'norm_dup':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x2', '*36+*10')
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x2', '*36x2')
|
||||
|
||||
if '*10x3' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x3')
|
||||
c_ind1 = phased_dup1.index('*10x3')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
|
||||
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = '*36+*10/*36+*10'
|
||||
|
||||
|
||||
if '*1x3' in phased_dup1:
|
||||
count2 = phased_dup1.count('*1x3')
|
||||
b_ind1 = phased_dup1.index('*1x3')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
|
||||
if count2 == 1:
|
||||
test_83 = hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_83 == 'norm_star39':
|
||||
pass
|
||||
|
||||
elif test_83 == 'hyb_83':
|
||||
phased_dup = phased_dup.replace('*1x3', '*1x2+*83')
|
||||
|
||||
|
||||
|
||||
if '*2x2' in phased_dup1:
|
||||
count2 = phased_dup1.count('*2x2')
|
||||
b_ind1 = phased_dup1.index('*2x2')
|
||||
b_ind2 = 1 - b_ind1
|
||||
|
||||
if count2 == 1:
|
||||
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
|
||||
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
|
||||
|
||||
if test_13_2_v1 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v2 == 'norm_var':
|
||||
pass
|
||||
|
||||
elif test_13_2_v1 == 'hyb_13_2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
elif test_13_2_v2 == 'hyb_13_2_v2':
|
||||
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
|
||||
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
if phased_dup == 'check':
|
||||
phased_dup = 'No_call'
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
if '*10x4' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x4')
|
||||
c_ind1 = phased_dup1.index('*10x4')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x4', '*36+*10x3')
|
||||
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x4', '*36x2+*10x2')
|
||||
|
||||
elif test_36 == 'hyb_36_36_36':
|
||||
phased_dup = phased_dup.replace('*10x4','*36x3+*10')
|
||||
|
||||
else:
|
||||
phased_dup = "No_call"
|
||||
|
||||
|
||||
elif '*10x3' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x3')
|
||||
c_ind1 = phased_dup1.index('*10x3')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = phased_dup.replace('*10x3', '*36x2+*10')
|
||||
|
||||
elif test_36 == 'hyb_36_36_36':
|
||||
phased_dup = phased_dup.replace('*10x3','*36x3')
|
||||
|
||||
else:
|
||||
phased_dup = "No_call"
|
||||
|
||||
|
||||
elif phased_dup1[0].startswith('*10x') or phased_dup1[1].startswith('*10x'):
|
||||
|
||||
if phased_dup1[0].startswith('*10x'):
|
||||
dup_10_hyb = phased_dup1[0]
|
||||
|
||||
elif phased_dup1[1].startswith('*10x'):
|
||||
dup_10_hyb = phased_dup1[1]
|
||||
|
||||
cn_star10 = dup_10_hyb[(dup_10_hyb.find('x') + 1 ):]
|
||||
|
||||
test_36 = hybrid_test_36_multi_10(sv_dup, cn, av_cov, cn_ex9_3pr, cn_star10)
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'check':
|
||||
phased_dup = 'No_call'
|
||||
|
||||
else:
|
||||
c_ind1 = phased_dup1.index(dup_10_hyb)
|
||||
c_ind2 = 1 - c_ind1
|
||||
phased_dup = str(phased_dup1[c_ind2]) + "/" + test_36
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
if phased_dup == 'check':
|
||||
phased_dup = 'No_call'
|
||||
|
||||
else:
|
||||
pass
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
|
||||
if '*10x4' in phased_dup1:
|
||||
count3 = phased_dup1.count('*10x4')
|
||||
c_ind1 = phased_dup1.index('*10x4')
|
||||
c_ind2 = 1 - c_ind1
|
||||
|
||||
if count3 == 1:
|
||||
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
|
||||
|
||||
|
||||
if test_36 == 'norm_mt':
|
||||
pass
|
||||
|
||||
elif test_36 == 'hyb_36_10':
|
||||
phased_dup = phased_dup.replace('*10x4', '*36+*10x3')
|
||||
|
||||
|
||||
elif test_36 == 'hyb_36_36':
|
||||
phased_dup = '*36+*10/*36+*10x2'
|
||||
|
||||
elif test_36 == 'hyb_36_36_36':
|
||||
phased_dup = '*36+*10/*36x2+*10'
|
||||
|
||||
else:
|
||||
phased_dup = "No_call"
|
||||
|
||||
|
||||
elif '*10x' in phased_dup1:
|
||||
phased_dup = "No_call"
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
print("Possible rare CYP2D6/2D7 hybrid present")
|
||||
|
||||
|
||||
|
||||
print("\nActivity score:")
|
||||
|
||||
score_list = []
|
||||
|
||||
score_list1 = []
|
||||
score_list2 = []
|
||||
score_list3 = []
|
||||
|
||||
allele_dict = {}
|
||||
|
||||
def get_ac_score(act_score, star_alleles):
|
||||
for line in open(act_score, "r"):
|
||||
line = line.strip().split()
|
||||
score_list.append(line)
|
||||
|
||||
for i in score_list:
|
||||
allele_dict[i[0]] = i[1]
|
||||
|
||||
star_alleles = star_alleles.replace("/", "+")
|
||||
star_alleles = star_alleles.split("+")
|
||||
|
||||
for elem in star_alleles:
|
||||
if "x" not in elem:
|
||||
m_allele = elem
|
||||
n_allele = "1"
|
||||
elif "x" in elem:
|
||||
index1 = elem.find("x")
|
||||
m_allele = elem[:index1]
|
||||
n_allele = elem[index1+1:]
|
||||
|
||||
p_allele = allele_dict[m_allele] + "_" + n_allele
|
||||
p_allele = p_allele.split("_")
|
||||
score_list1.append(p_allele)
|
||||
|
||||
for i in score_list1:
|
||||
score_list2.append(i[0])
|
||||
|
||||
if "n" in score_list2:
|
||||
return "Indeterminate"
|
||||
|
||||
else:
|
||||
for i in score_list1:
|
||||
score_list3.append(float(i[0])*float(i[1]))
|
||||
|
||||
total_a_score = sum(score_list3)
|
||||
return total_a_score
|
||||
|
||||
|
||||
|
||||
if gene_alleles in ["",'No_call','check']:
|
||||
ac_score = "Indeterminate"
|
||||
print(ac_score)
|
||||
|
||||
|
||||
elif gene_alleles != "":
|
||||
ac_score = get_ac_score(act_score, gene_alleles)
|
||||
print(ac_score)
|
||||
|
||||
|
||||
print("\nMetaboliser status:")
|
||||
|
||||
if ac_score == "Indeterminate":
|
||||
print ("Indeterminate")
|
||||
|
||||
elif ac_score == 0:
|
||||
print("Poor metaboliser (PM)")
|
||||
|
||||
elif 0 < ac_score < 1.25:
|
||||
print("Intermediate metaboliser (IM)")
|
||||
|
||||
elif 1.25 <= ac_score <= 2.25:
|
||||
print("Normal metaboliser (NM)")
|
||||
|
||||
elif ac_score > 2.25:
|
||||
print("Ultrarapid metaboliser (UM)")
|
||||
611
pgx-main/scripts/cyp2d6/hg38/bin/sv_modules.py
Normal file
611
pgx-main/scripts/cyp2d6/hg38/bin/sv_modules.py
Normal file
@@ -0,0 +1,611 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg = []
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_2d6_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
av_vdr_cov = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
av_in1_3pr = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_ex9_3pr = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_in4_3pr = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
av_5pr_in4 = float(all_reg[5][3])/(float(all_reg[5][2]) - float(all_reg[5][1]))
|
||||
av_2d7_ex9 = float(all_reg[6][3])/(float(all_reg[6][2]) - float(all_reg[6][1]))
|
||||
av_2d7_in4_in8 = float(all_reg[7][3])/(float(all_reg[7][2]) - float(all_reg[7][1]))
|
||||
av_egfr_cov = float(all_reg[8][3])/(float(all_reg[8][2]) - float(all_reg[8][1]))
|
||||
av_2d7_ex2_in8 = float(all_reg[9][3])/(float(all_reg[9][2]) - float(all_reg[9][1]))
|
||||
av_2d7_5pr_in1 = float(all_reg[10][3])/(float(all_reg[10][2]) - float(all_reg[10][1]))
|
||||
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_2d6_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
in1_3pr = round(2 * av_in1_3pr/av_ctrl_cov)
|
||||
ex9_3pr = (2 * av_ex9_3pr/av_ctrl_cov)
|
||||
|
||||
return [str(int(total_cn)), round(av_2d6_cov), str(int(in1_3pr)), round(av_ctrl_cov), str(ex9_3pr), round(av_in1_3pr), str(av_in4_3pr), str(av_5pr_in4), str(av_2d7_ex9), str(av_2d7_in4_in8), str(av_2d7_ex2_in8), str(av_2d7_5pr_in1)];
|
||||
|
||||
|
||||
samp_gt = ""
|
||||
samp_gt_hap1 = ""
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*5/*5"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*5"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/av_read_cov, 4))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == -1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 2)
|
||||
|
||||
elif allele_cn_list[3] == -1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 2)
|
||||
|
||||
elif allele_cn_list[3] < -1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(3)
|
||||
|
||||
elif allele_cn_list[1] < -1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(3)
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == -1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 2)
|
||||
|
||||
elif allele_cn_list[3] == -1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 2)
|
||||
|
||||
elif allele_cn_list[3] < 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] + allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[1] < 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] + allele_cn_list[1] - 1)
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def hybrid_test_68(sv_dup, c_num, av_cov, cn_in1_3pr1, in_list):
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
test_list3 = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(abs(float(i[-2])))
|
||||
test_list3.append(i[-1])
|
||||
|
||||
index1 = test_list1.index('42130692~G>A')
|
||||
index2 = test_list1.index('42128945~C>T')
|
||||
|
||||
val_68 = test_list3[index1]
|
||||
val_4 = test_list3[index2]
|
||||
|
||||
rt = val_68/val_4
|
||||
|
||||
|
||||
if rt <= 1.4:
|
||||
return 'norm_dup'
|
||||
|
||||
elif rt > 1.4:
|
||||
return 'hyb_68'
|
||||
|
||||
|
||||
else:
|
||||
return 'norm_dup'
|
||||
|
||||
|
||||
def hyb_test_5_68_4(sv_del, in1_3pr1_float, av_cov):
|
||||
test_del = []
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
test_del.append(line.strip())
|
||||
|
||||
# if len(test_del) == 0:
|
||||
# return 'norm_art'
|
||||
|
||||
# elif len(test_del) > 0:
|
||||
# return 'del_hyb'
|
||||
|
||||
t1 = 2 * in1_3pr1_float/av_cov
|
||||
|
||||
if len(test_del) == 0 and (1.6 < t1 < 2.8):
|
||||
return 'norm_art'
|
||||
|
||||
elif len(test_del) > 0 and t1 < 1.6:
|
||||
return 'del_hyb'
|
||||
|
||||
|
||||
def hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8):
|
||||
|
||||
|
||||
if ((int(cn) - 1) - 0.3) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5) or (2.5 < (2 * float(cn_2d7_ex9)/float(cn_2d7_in4_in8)) < 3.5):
|
||||
return 'hyb_36_10'
|
||||
|
||||
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.7):
|
||||
return 'hyb_36_36'
|
||||
else:
|
||||
return 'norm_dup'
|
||||
|
||||
|
||||
def hybrid_test_36_single(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_star10'
|
||||
|
||||
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_36_single'
|
||||
|
||||
else:
|
||||
return 'norm_star10'
|
||||
|
||||
|
||||
|
||||
def hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_mt'
|
||||
|
||||
elif ((int(cn) - 1) - 0.05) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_36_10'
|
||||
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.95):
|
||||
return 'hyb_36_36'
|
||||
|
||||
|
||||
def hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_mt'
|
||||
|
||||
elif ((int(cn) - 1) - 0.05) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_36_10'
|
||||
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.95):
|
||||
return 'hyb_36_36'
|
||||
elif (int(cn) - 3) <= float(cn_ex9_3pr) < (int(cn) - 3 + 0.95):
|
||||
return 'hyb_36_36_36'
|
||||
else:
|
||||
return 'check'
|
||||
|
||||
|
||||
def hybrid_test_36_multi_10(sv_dup, cn, av_cov, cn_ex9_3pr, cn_star10):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_mt'
|
||||
|
||||
elif float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
cn_star36 = int(cn) - int(round(float(cn_ex9_3pr)))
|
||||
adj_cn_star10 = int(cn_star10) - cn_star36
|
||||
|
||||
if cn_star36 == 1:
|
||||
return '*36+*10x' + str(adj_cn_star10)
|
||||
else:
|
||||
return '*36x' + str(cn_star36) + '+*10x' + str(adj_cn_star10)
|
||||
|
||||
else:
|
||||
return 'check'
|
||||
|
||||
|
||||
def hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4):
|
||||
|
||||
if 0.85 < float(cov_in4_3pr)/float(cov_5pr_in4) < 1.2:
|
||||
return 'norm_var'
|
||||
elif 0.45 < float(cov_in4_3pr)/float(cov_5pr_in4) < 0.75:
|
||||
return 'hyb_13_2'
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
def hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1):
|
||||
|
||||
if 0.85 < float(cov_2d7_ex2_in8)/float(cov_2d7_5pr_in1) < 1.2:
|
||||
return 'norm_var'
|
||||
elif 0.45 < float(cov_2d7_ex2_in8)/float(cov_2d7_5pr_in1) < 0.75:
|
||||
return 'hyb_13_2_v2'
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
|
||||
def tandem_90_1(in_list, alt_allele, cn):
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
test_list3 = []
|
||||
|
||||
for i in in_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(abs(float(i[-2])))
|
||||
test_list3.append(i[-1])
|
||||
|
||||
|
||||
if len(test_list1) > 1:
|
||||
index1 = test_list1.index('42129098~T>C')
|
||||
a = test_list3[index1]
|
||||
test_list3.pop(index1)
|
||||
b = max(test_list3)
|
||||
|
||||
c = round(b/a)
|
||||
|
||||
if int(cn) == 3 and c == 1:
|
||||
res = alt_allele + "/" + "*90+*1"
|
||||
|
||||
elif int(cn) == 3 and c > 1:
|
||||
res = alt_allele + "x2" + "/" + "*90"
|
||||
|
||||
elif int(cn) == 4 and c == 2:
|
||||
res = alt_allele + "x2" + "/" + "*90+*1"
|
||||
|
||||
elif int(cn) == 4 and c >= 3:
|
||||
res = alt_allele + "x3" + "/" + "*90"
|
||||
|
||||
else:
|
||||
val1 = test_list2[0]
|
||||
val2 = round(val1 * int(cn))
|
||||
|
||||
if int(cn) == 3 and val2 == 1:
|
||||
res = '*1/*90+*1'
|
||||
elif int(cn) == 3 and val2 == 2:
|
||||
res = '*90/*90+*1'
|
||||
elif int(cn) == 4 and val2 == 1:
|
||||
res = '*1x2/*90+*1'
|
||||
elif int(cn) == 4 and val2 == 2:
|
||||
res = '*90+*1/*90+*1'
|
||||
elif int(cn) == 4 and val2 == 3:
|
||||
res = '*90x2/*90+*1'
|
||||
elif int(cn) == 3 and val2 == 3:
|
||||
res = '*90/*90x2'
|
||||
elif int(cn) == 4 and val2 == 4:
|
||||
res = '*90/*90x3'
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def tandem_57_10(in_list, alt_allele, cn):
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
test_list3 = []
|
||||
|
||||
for i in in_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(abs(float(i[-2])))
|
||||
test_list3.append(i[-1])
|
||||
|
||||
if len(test_list1) > 1:
|
||||
index1 = test_list1.index('42129906~G>A')
|
||||
a = test_list3[index1]
|
||||
test_list3.pop(index1)
|
||||
index2 = test_list1.index('42130692~G>A')
|
||||
m = test_list3[index2]
|
||||
test_list3.pop(index2)
|
||||
b = max(test_list3)
|
||||
|
||||
c = round(b/a)
|
||||
p = round(m/a)
|
||||
|
||||
if int(cn) == 3 and c == 1 and p > 1:
|
||||
res = alt_allele + "/" + "*57+*10"
|
||||
|
||||
elif int(cn) == 3 and c > 1 and p == 1:
|
||||
res = alt_allele + "x2" + "/" + "*57"
|
||||
|
||||
elif int(cn) == 4 and c == 2 and p > 1:
|
||||
res = alt_allele + "x2" + "/" + "*57+*10"
|
||||
|
||||
elif int(cn) == 4 and c >= 3 and p == 1:
|
||||
res = alt_allele + "x3" + "/" + "*57"
|
||||
|
||||
elif int(cn) == 4 and p == 1 and alt_allele == '*10':
|
||||
res = "*57+*10" + "/" + "*57+*10"
|
||||
|
||||
elif int(cn) == 4 and p > 1 and alt_allele == '*10':
|
||||
res = "*10x2" + "/" + "*57+*10"
|
||||
|
||||
else:
|
||||
res = alt_allele + "/" + "*57+*10"
|
||||
|
||||
return res
|
||||
|
||||
|
||||
|
||||
def hybrid_test_83_single(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_star39'
|
||||
|
||||
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_83_single'
|
||||
|
||||
else:
|
||||
return 'norm_star39'
|
||||
|
||||
|
||||
def hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr):
|
||||
|
||||
if int(round(float(cn_ex9_3pr))) == int(cn):
|
||||
return 'norm_star39'
|
||||
|
||||
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
|
||||
return 'hyb_83'
|
||||
|
||||
else:
|
||||
return 'norm_star39'
|
||||
78
pgx-main/scripts/cyp2e1/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp2e1/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = 'ref.v1_ref.v1'
|
||||
allele_res = '*ref/*ref'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
461
pgx-main/scripts/cyp2e1/b37/bin/snv_def_modules.py
Normal file
461
pgx-main/scripts/cyp2e1/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,461 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['ref.v1_ref.v1']
|
||||
allele_res = "*ref/*ref"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
#print(score)
|
||||
|
||||
|
||||
# if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
# elem = "17.v1_4.v1"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
# print(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
218
pgx-main/scripts/cyp2e1/b37/bin/stellarpgx.py
Normal file
218
pgx-main/scripts/cyp2e1/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,218 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2E1 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
av_cov = get_total_CN(cov_file)[1]
|
||||
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
if snv_def_alleles != '*ref/*ref' and cn != '0':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
|
||||
if cn == '2':
|
||||
|
||||
if 'or' in snv_def_alleles:
|
||||
print (snv_def_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = snv_def_alleles
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
if del_confirm == '*(full_gene_del)/*(full_gene_del)':
|
||||
gene_alleles = del_confirm
|
||||
print (gene_alleles)
|
||||
|
||||
elif del_confirm == '*(full_gene_del)':
|
||||
gene_alleles = del_confirm + "/" + "*other"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "*(full_gene_del)/*(full_gene_del)"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == 'None':
|
||||
print (snv_def_alleles + "\t" + "Possible CYP2E1 gene deletion present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == 'None':
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*(full_gene_del)"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
gene_alleles = samp_allele1 + "/" + "*(full_gene_del)"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
if del_confirm == "*(full_gene_del)/*(full_gene_del)":
|
||||
del_confirm = "*(full_gene_del)"
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
if del_confirm == "*(full_gene_del)/*(full_gene_del)":
|
||||
del_confirm = "*(full_gene_del)"
|
||||
gene_alleles = del_confirm + "/" + samp_allele1
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
orig = snv_def_alleles
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
|
||||
print("Possible rare CYP2E1 structural variant present")
|
||||
331
pgx-main/scripts/cyp2e1/b37/bin/sv_modules.py
Normal file
331
pgx-main/scripts/cyp2e1/b37/bin/sv_modules.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_2e1_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
# av_e1_int4 = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
# av_int4_e9 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_2e1_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_2e1_cov), round(av_ctrl_cov)]; # , str(av_e1_int4), str(av_int4_e9)];
|
||||
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*(full_gene_del)/*(full_gene_del)"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*(full_gene_del)"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/norm_cov, 3))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
# def hybrid_29_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
# if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
# return 'norm_var'
|
||||
|
||||
# elif 0.45 < float(cov_e1_int4)/float(cov_int4_e9) < 0.75:
|
||||
# return 'hyb_29'
|
||||
|
||||
# elif float(cov_e1_int4)/float(cov_int4_e9) < 0.15:
|
||||
# return 'hyb_29_2'
|
||||
|
||||
# else:
|
||||
# return 'norm_var'
|
||||
|
||||
|
||||
# def hybrid_30_test1(cov_e1_int4, cov_int4_e9):
|
||||
|
||||
# if 0.85 < float(cov_e1_int4)/float(cov_int4_e9) < 1.2:
|
||||
# return 'norm_var'
|
||||
|
||||
# elif 0.45 < float(cov_int4_e9)/float(cov_e1_int4) < 0.75:
|
||||
# return 'hyb_30'
|
||||
|
||||
# elif float(cov_int4_e9)/float(cov_e1_int4) < 0.15:
|
||||
# return 'hyb_30_2'
|
||||
|
||||
# else:
|
||||
# return 'norm_var'
|
||||
|
||||
56
pgx-main/scripts/cyp2e1/hg38/bin/stellarpgx.py
Normal file
56
pgx-main/scripts/cyp2e1/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import math
|
||||
from sv_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP2E1 Variant Analysis with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
infile = sys.argv[1]
|
||||
cov_file = sys.argv[2]
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
core_vars = "No core SNVs detected; haplotypes equivalent to GRCh38 content"
|
||||
|
||||
return core_vars
|
||||
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
av_ctrl_cov = get_total_CN(cov_file)[2]
|
||||
exon_cov = get_total_CN(cov_file)[3]
|
||||
|
||||
print("Initially computed Copy Number = {}".format(cn))
|
||||
|
||||
for i in range(1, len(exon_cov)):
|
||||
|
||||
if exon_cov[i-1]/av_ctrl_cov < 0.45 :
|
||||
print ('Check exon {} for potential deletion if using high coverage WGS'.format(str(i)))
|
||||
else:
|
||||
pass
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
37
pgx-main/scripts/cyp2e1/hg38/bin/sv_modules.py
Normal file
37
pgx-main/scripts/cyp2e1/hg38/bin/sv_modules.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_cyp2e1_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
|
||||
exon_cov_list = []
|
||||
all_reg = all_reg[3:]
|
||||
|
||||
a = list(range(1, len(all_reg)))
|
||||
|
||||
for i in a:
|
||||
exon_cov = 'av_e' + str(i)
|
||||
exon_cov = float(all_reg[i-1][3])/(float(all_reg[i-1][2]) - float(all_reg[i-1][1]))
|
||||
exon_cov_list.append(exon_cov)
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_cyp2e1_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_cyp2e1_cov), round(av_ctrl_cov), exon_cov_list];
|
||||
|
||||
78
pgx-main/scripts/cyp3a4/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp3a4/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
310
pgx-main/scripts/cyp3a4/b37/bin/snv_def_modules.py
Normal file
310
pgx-main/scripts/cyp3a4/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,310 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
73
pgx-main/scripts/cyp3a4/b37/bin/stellarpgx.py
Normal file
73
pgx-main/scripts/cyp3a4/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP3A4 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp3a4/b37/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp3a4/b37/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp3a4/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp3a4/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
301
pgx-main/scripts/cyp3a4/hg38/bin/snv_def_modules.py
Normal file
301
pgx-main/scripts/cyp3a4/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,301 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
73
pgx-main/scripts/cyp3a4/hg38/bin/stellarpgx.py
Normal file
73
pgx-main/scripts/cyp3a4/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP3A4 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp3a4/hg38/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp3a4/hg38/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp3a5/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp3a5/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '3.v1_3.v1'
|
||||
allele_res = '*3/*3'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
560
pgx-main/scripts/cyp3a5/b37/bin/snv_def_modules.py
Normal file
560
pgx-main/scripts/cyp3a5/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,560 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['3.v1_3.v1']
|
||||
allele_res = "*3/*3"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
score2 = []
|
||||
test1 = []
|
||||
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
uniq_gt1 = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
for j in diplo_supp_gt:
|
||||
if j not in all_var_gt:
|
||||
uniq_gt1.append(j)
|
||||
score_dip2 = len(uniq_gt1)
|
||||
score2.append(score_dip2)
|
||||
|
||||
min_score = min(score)
|
||||
min_score2 = min(score2)
|
||||
|
||||
res_list = [i for i in range(len(score2)) if score2[i] == min_score2]
|
||||
|
||||
# return [tiebreak1, res_list];
|
||||
|
||||
# if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
# elem = "17.v1_4.v1"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
amb_set1 = []
|
||||
|
||||
if len(res_list) > 3:
|
||||
soln_list_1 = soln_list1
|
||||
|
||||
elif len(res_list) < 3:
|
||||
amb_set1.append(tiebreak1[res_list[0]])
|
||||
amb_set1.append(tiebreak1[res_list[-1]])
|
||||
soln_list_1 = amb_set1
|
||||
|
||||
# return [tiebreak1, 'true'];
|
||||
for elem in soln_list_1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
# for i in score:
|
||||
# if i == min_score:
|
||||
# index_scores.append(score.index(i))
|
||||
|
||||
index_scores = [i for i in range(len(score)) if score[i] == min_score]
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
# return [index_scores, 'true']
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
|
||||
elif chkList(alt_solns) != "Equal" and alt_solns[0] == '*10/*1B10':
|
||||
return[soln_list1, '*10/*1'];
|
||||
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
score2 = []
|
||||
test1 = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
uniq_gt1 = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
# return [tiebreak1, score];
|
||||
for j in diplo_supp_gt:
|
||||
if j not in all_var_gt:
|
||||
uniq_gt1.append(j)
|
||||
score_dip2 = len(uniq_gt1)
|
||||
score2.append(score_dip2)
|
||||
|
||||
min_score = min(score)
|
||||
min_score2 = min(score2)
|
||||
|
||||
res_list = [i for i in range(len(score2)) if score2[i] == min_score2]
|
||||
|
||||
|
||||
# return[tiebreak1, res_list];
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
|
||||
amb_soln_set = []
|
||||
amb_set1 = []
|
||||
|
||||
if len(res_list) > 3:
|
||||
soln_list_1 = soln_list1
|
||||
|
||||
elif len(res_list) < 3:
|
||||
amb_set1.append(tiebreak1[res_list[0]])
|
||||
amb_set1.append(tiebreak1[res_list[-1]])
|
||||
soln_list_1 = amb_set1
|
||||
|
||||
|
||||
for elem in soln_list_1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
index_scores = [i for i in range(len(score)) if score[i] == min_score]
|
||||
|
||||
|
||||
# return[tiebreak1, index_scores];
|
||||
|
||||
alt_solns = []
|
||||
alt_solns1 = []
|
||||
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
|
||||
for n in res_list:
|
||||
elem = tiebreak1[n]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns1.append(result_dip)
|
||||
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
|
||||
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
for i in alt_solns:
|
||||
if i in alt_solns1:
|
||||
amb_soln_set.append(i)
|
||||
# amb_soln_set.append(alt_solns[0])
|
||||
# amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
76
pgx-main/scripts/cyp3a5/b37/bin/stellarpgx.py
Normal file
76
pgx-main/scripts/cyp3a5/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP3A5 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
|
||||
if supp_core_vars == None:
|
||||
print('rs776746')
|
||||
|
||||
else:
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp3a5/b37/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp3a5/b37/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp3a5/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp3a5/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
560
pgx-main/scripts/cyp3a5/hg38/bin/snv_def_modules.py
Normal file
560
pgx-main/scripts/cyp3a5/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,560 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
score2 = []
|
||||
test1 = []
|
||||
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
uniq_gt1 = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
for j in diplo_supp_gt:
|
||||
if j not in all_var_gt:
|
||||
uniq_gt1.append(j)
|
||||
score_dip2 = len(uniq_gt1)
|
||||
score2.append(score_dip2)
|
||||
|
||||
min_score = min(score)
|
||||
min_score2 = min(score2)
|
||||
|
||||
res_list = [i for i in range(len(score2)) if score2[i] == min_score2]
|
||||
|
||||
# return [tiebreak1, res_list];
|
||||
|
||||
# if chkList(score) == "Equal" and soln_list1[0] == "17.v1_4.v1":
|
||||
# elem = "17.v1_4.v1"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
amb_set1 = []
|
||||
|
||||
if len(res_list) > 3:
|
||||
soln_list_1 = soln_list1
|
||||
|
||||
elif len(res_list) < 3:
|
||||
amb_set1.append(tiebreak1[res_list[0]])
|
||||
amb_set1.append(tiebreak1[res_list[-1]])
|
||||
soln_list_1 = amb_set1
|
||||
|
||||
# return [tiebreak1, 'true'];
|
||||
for elem in soln_list_1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "11.v1_2.v2":
|
||||
# elem = "11.v1_2.v2"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
# for i in score:
|
||||
# if i == min_score:
|
||||
# index_scores.append(score.index(i))
|
||||
|
||||
index_scores = [i for i in range(len(score)) if score[i] == min_score]
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
# return [index_scores, 'true']
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
|
||||
elif chkList(alt_solns) != "Equal" and alt_solns[0] == '*10/*1B10':
|
||||
return[soln_list1, '*10/*1'];
|
||||
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
score2 = []
|
||||
test1 = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
uniq_gt1 = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
# return [tiebreak1, score];
|
||||
for j in diplo_supp_gt:
|
||||
if j not in all_var_gt:
|
||||
uniq_gt1.append(j)
|
||||
score_dip2 = len(uniq_gt1)
|
||||
score2.append(score_dip2)
|
||||
|
||||
min_score = min(score)
|
||||
min_score2 = min(score2)
|
||||
|
||||
res_list = [i for i in range(len(score2)) if score2[i] == min_score2]
|
||||
|
||||
|
||||
# return[tiebreak1, res_list];
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
|
||||
amb_soln_set = []
|
||||
amb_set1 = []
|
||||
|
||||
if len(res_list) > 3:
|
||||
soln_list_1 = soln_list1
|
||||
|
||||
elif len(res_list) < 3:
|
||||
amb_set1.append(tiebreak1[res_list[0]])
|
||||
amb_set1.append(tiebreak1[res_list[-1]])
|
||||
soln_list_1 = amb_set1
|
||||
|
||||
|
||||
for elem in soln_list_1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
if amb_soln_set[0] != amb_soln_set[1]:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
else:
|
||||
allele_res = amb_soln_set[0]
|
||||
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
index_scores = [i for i in range(len(score)) if score[i] == min_score]
|
||||
|
||||
|
||||
# return[tiebreak1, index_scores];
|
||||
|
||||
alt_solns = []
|
||||
alt_solns1 = []
|
||||
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
|
||||
for n in res_list:
|
||||
elem = tiebreak1[n]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns1.append(result_dip)
|
||||
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
|
||||
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
for i in alt_solns:
|
||||
if i in alt_solns1:
|
||||
amb_soln_set.append(i)
|
||||
# amb_soln_set.append(alt_solns[0])
|
||||
# amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
70
pgx-main/scripts/cyp3a5/hg38/bin/stellarpgx.py
Normal file
70
pgx-main/scripts/cyp3a5/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP3A5 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp3a5/hg38/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp3a5/hg38/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp4f2/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp4f2/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1.v1_1.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
448
pgx-main/scripts/cyp4f2/b37/bin/snv_def_modules.py
Normal file
448
pgx-main/scripts/cyp4f2/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,448 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
# all_variants = line.strip().split(";")
|
||||
# print(all_variants)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
#print("\nSupporting variants")
|
||||
#print("\n" + "".join(all_variants))
|
||||
sys.exit()
|
||||
|
||||
# core_variants = []
|
||||
|
||||
# for line in open(infile, "r"):
|
||||
# line = line.strip()
|
||||
# core_variants.append(line)
|
||||
|
||||
# core_variants = ";".join(sorted(core_variants))
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
# if int(cn) == 1:
|
||||
# core_variants = core_variants.replace("~0/1", "~1/1")
|
||||
|
||||
# else:
|
||||
# pass
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
#return soln_list1
|
||||
|
||||
#print("\nResult:")
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
#print ("\nSupporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
#print("\nUnique variants in soln 1: {}".format(len(uniq_diplo1)))
|
||||
#print("\nUnique variants in soln 2: {}".format(len(uniq_diplo2)))
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v11_74.v1" and diplo2 == "4.v12_1.v1"):
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and diplo2 == "41.v1_65.v1":
|
||||
# res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo2[:res2[0]])
|
||||
# hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo2, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) and (diplo1 == "4.v1_6.v1" and diplo2 == "4.v4_6.v2") :
|
||||
# res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo1[:res2[0]])
|
||||
# hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
#print(score)
|
||||
|
||||
if chkList(score) == "Equal": # and soln_list1[1] != "39.v1_4.v5":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
# elif chkList(score) == "Equal" and soln_list1[1] == "39.v1_4.v5":
|
||||
# elem = "39.v1_4.v5"
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# return [soln_list1, elem, result_dip];
|
||||
#amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
#print("\n" + result_dip)
|
||||
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[1] == "39.v1_4.v5":
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif score.count(min_score) > 1 and soln_list1[0] == "1.v1_2.v1" and soln_list1[1] == "34.v1_39.v1":
|
||||
# amb_soln_set = []
|
||||
# temp_set = []
|
||||
# temp_set.append("1.v1_2.v1")
|
||||
# temp_set.append("34.v1_39.v1")
|
||||
# for elem in temp_set:
|
||||
# res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
# hap1 = "*" + str (elem[:res2[0]])
|
||||
# hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
# result_dip = hap1 + "/" + hap2
|
||||
# amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
|
||||
# allele_res = " or ".join(amb_soln_set)
|
||||
# return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif score.count(min_score) > 2:
|
||||
amb_soln_set = []
|
||||
temp_set = []
|
||||
temp_set.append(tiebreak1[0])
|
||||
temp_set.append(tiebreak1[-1])
|
||||
|
||||
for elem in temp_set:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
# elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) and diplo3 == "39.v1_4.v4":
|
||||
# res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
# res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
# hap1 = "*" + str (diplo3[:res2[0]])
|
||||
# hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
# allele_res = hap1 + "/" + hap2
|
||||
# return [soln_list1, diplo3, allele_res]
|
||||
#print ("Supporting variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
# print(score)
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
#elem_pos = tiebreak1.index(elem)
|
||||
#print ("Solution " + str(elem_pos) + ": " + result_dip)
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
#print ("\nSupporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
#print ("Supporting core variants:")
|
||||
#print ("\n" + core_variants + "\n")
|
||||
|
||||
|
||||
|
||||
#print("\nFull diplotype variants:")
|
||||
#print("\n" + ";".join(all_var_gt))
|
||||
72
pgx-main/scripts/cyp4f2/b37/bin/stellarpgx.py
Normal file
72
pgx-main/scripts/cyp4f2/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP4F2 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp4f2/b37/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp4f2/b37/bin/sv_modules.py
Normal file
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user