pgx-main from prod added

This commit is contained in:
2025-08-18 12:06:58 +02:00
parent fcb0e9aa4c
commit fe48df8676
984 changed files with 878657 additions and 0 deletions

View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
import os
import sys
def get_backgroud_alleles(database, core_vars):
dbs = []
dbs_temp = []
core_vars_list = core_vars.split(";")
core_temp1 = core_vars_list[-1][:-4]
core_temp2 = core_vars_list[0][:-4]
for line in open(database, "r"):
line = line.strip().split("\t")
dbs.append(line)
for record in dbs:
temp_rec = record[1]
if core_temp1 and core_temp2 in temp_rec:
dbs_temp.append(record)
scores = []
candidates = []
cand_vars = []
for elem in dbs_temp:
candidates.append(elem[0])
record_core_var = elem[1].split(";")
cand_vars.append(record_core_var)
counter = 0
for i in record_core_var:
if i in core_vars_list:
counter += 3
elif i[:-4] in core_vars:
counter += 1
else:
counter += -2
scores.append(counter)
cand_diplos = []
diplo_vars2 = []
if len(scores) == 0:
diplo1 = '2.v1_2.v1'
allele_res = '*2/*2'
else:
max_score = max(scores)
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
for i in indices:
diplo = candidates[i]
diplo_vars1 = len(cand_vars[i])
cand_diplos.append(diplo)
diplo_vars2.append(diplo_vars1)
min_index = diplo_vars2.index(min(diplo_vars2))
diplo1 = cand_diplos[min_index]
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
hap1 = "*" + str (diplo1[:res2[0]])
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
allele_res = hap1 + "/" + hap2
return [allele_res, diplo1];

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,802 @@
import os
import sys
import subprocess
from snv_def_modules import *
from sv_modules import *
from bkg_modules import *
print("--------------------------------------------\n")
print("CYP2D6 Star Allele Calling with StellarPGx\n")
print("--------------------------------------------\n")
database = sys.argv[1]
infile = sys.argv[2]
infile_full = sys.argv[3]
infile_full_gt = sys.argv[4]
infile_spec = sys.argv[5]
sv_del = sys.argv[6]
sv_dup = sys.argv[7]
cov_file = sys.argv[8]
hap_dbs = sys.argv[9]
act_score = sys.argv[10]
cn = get_total_CN(cov_file)[0]
print("Initially computed CN = {}".format(cn))
supp_core_vars = get_core_variants(infile, cn)
print("\nSample core variants:")
print(supp_core_vars)
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
if snv_def_calls == None:
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
if int(cn) == 0:
print("\nResult:")
print("*5/*5")
elif bac_alleles == None:
print("\nResult:")
print("Possible novel allele or suballele present: interpret with caution")
elif bac_alleles != None and int(cn) < 2:
bac_alleles = bac_alleles[0].split("/")
bac_alleles1 = bac_alleles[0] + "/" + "*5"
print("\nResult:")
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
print("\nLikely background alleles:")
print("[" + bac_alleles1 + "]")
else:
print("\nCandidate alleles:")
print("[" + bac_alleles[-1] + "]")
print("\nResult:")
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
print("\nLikely background alleles:")
print("[" + bac_alleles[0] + "]")
print("\nActivity score:")
print("Indeterminate")
print("\nMetaboliser status:")
print("Indeterminate")
sys.exit()
best_diplos = snv_def_calls[0]
print("\nCandidate alleles:")
print(best_diplos)
snv_def_alleles = snv_def_calls[-1]
if "or" in snv_def_alleles:
pass
else:
snv_cand_alleles = snv_def_calls[1]
dip_variants = get_all_vars_gt(infile_full_gt)
print("\nResult:")
# cn = get_total_CN(cov_file)[0]
av_cov = get_total_CN(cov_file)[3]
cn_in1_3pr = get_total_CN(cov_file)[2]
cn_ex9_3pr = get_total_CN(cov_file)[4]
in1_3pr_float = get_total_CN(cov_file)[5]
cov_in4_3pr = get_total_CN(cov_file)[6]
cov_5pr_in4 = get_total_CN(cov_file)[7]
cn_2d7_ex9 = get_total_CN(cov_file)[8]
cn_2d7_in4_in8 = get_total_CN(cov_file)[9]
cov_2d7_ex2_in8 = get_total_CN(cov_file)[10]
cov_2d7_5pr_in1 = get_total_CN(cov_file)[11]
# print(float(cn_ex9_3pr))
gene_alleles = ""
if snv_def_alleles != '*2/*2':
in_list = dup_test_init(sv_dup, av_cov)
if cn == '2' and snv_def_alleles == '*4/*4':
test_68 = hyb_test_5_68_4(sv_del, in1_3pr_float, av_cov)
if test_68 == 'norm_art':
pass
elif test_68 == 'del_hyb':
snv_def_alleles = (snv_def_alleles.replace('*4', '*5', 1)).replace('*4', '*68+*4')
gene_alleles = snv_def_alleles
print(snv_def_alleles)
elif cn == '2':
if 'or' in snv_def_alleles:
print (snv_def_alleles)
else:
snv_def_alleles = snv_def_alleles.split("/")
if snv_def_alleles[0] == '*2' or snv_def_alleles[1] == '*2':
ind_star2 = snv_def_alleles.index('*2')
ind_other = 1 - ind_star2
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
if test_13_2_v1 == 'norm_var':
gene_alleles = "/".join(snv_def_alleles)
print(gene_alleles)
elif test_13_2_v1 == 'hyb_13_2':
gene_alleles = snv_def_alleles[ind_other] + "/" + "*13+*2"
print(gene_alleles)
elif test_13_2_v2 == 'hyb_13_2_v2':
gene_alleles = snv_def_alleles[ind_other] + "/" + "*13"
print(gene_alleles)
elif snv_def_alleles[0] == '*39' or snv_def_alleles[1] == '*39':
ind_star2 = snv_def_alleles.index('*39')
ind_other = 1 - ind_star2
test_83_single = hybrid_test_83_single(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_83_single == 'norm_star39':
gene_alleles = "/".join(snv_def_alleles)
print(gene_alleles)
elif test_83_single == 'hyb_83_single':
gene_alleles = snv_def_alleles[ind_other] + "/" + "*83"
print(gene_alleles)
elif snv_def_alleles[0] == '*10' or snv_def_alleles[1] == '*10':
ind_star2 = snv_def_alleles.index('*10')
ind_other = 1 - ind_star2
test_36_single = hybrid_test_36_single(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36_single == 'norm_star10':
gene_alleles = "/".join(snv_def_alleles)
print(gene_alleles)
elif test_36_single == 'hyb_36_single':
gene_alleles = snv_def_alleles[ind_other] + "/" + "*36"
print(gene_alleles)
else:
gene_alleles = "/".join(snv_def_alleles)
print(gene_alleles)
elif cn == '0':
del_confirm = del_test(sv_del)
if del_confirm == '*5/*5':
gene_alleles = del_confirm
print (gene_alleles)
elif del_confirm == '*5':
gene_alleles = del_confirm + "/" + "*other"
print(gene_alleles)
else:
gene_alleles = "*5/*5"
print(gene_alleles)
elif cn == '1':
del_confirm = del_test(sv_del)
if "or" in snv_def_alleles and del_confirm == 'None':
print (snv_def_alleles + "\t" + "Possible CYP2D6 gene deletion (*5) present")
elif "or" not in snv_def_alleles and del_confirm == 'None':
snv_def_alleles = snv_def_alleles.split("/")
snv_cand_alleles = "".join(snv_cand_alleles)
snv_cand_alleles = snv_cand_alleles.split("_")
if snv_def_alleles[0] == snv_def_alleles[1]:
gene_alleles = snv_def_alleles[0] + "/" + "*5"
print(gene_alleles)
elif snv_def_alleles[0] != snv_def_alleles[1]:
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
gene_alleles = samp_allele1 + "/" + "*5"
print(gene_alleles)
else:
snv_def_alleles = snv_def_alleles.split("/")
snv_cand_alleles = "".join(snv_cand_alleles)
snv_cand_alleles = snv_cand_alleles.split("_")
if snv_def_alleles[0] == snv_def_alleles[1]:
if del_confirm == "*5/*5":
del_confirm = "*5"
else:
del_confirm = "*5"
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
print(gene_alleles)
elif snv_def_alleles[0] != snv_def_alleles[1]:
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
if del_confirm == "*5/*5":
del_confirm = "*5"
else:
del_confirm = "*5"
gene_alleles = del_confirm + "/" + samp_allele1
print(gene_alleles)
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
orig = snv_def_alleles
if "or" in snv_def_alleles:
print (snv_def_alleles + "\t" + "Duplication present")
else:
snv_def_alleles = snv_def_alleles.split("/")
snv_cand_alleles = "".join(snv_cand_alleles)
snv_cand_alleles = snv_cand_alleles.split("_")
if snv_def_alleles[0] == '*90' or snv_def_alleles[1] == '*90':
alt_allele_ind = 1 - snv_def_alleles.index('*90')
alt_allele = snv_def_alleles[alt_allele_ind]
sp_allele = tandem_90_1(in_list, alt_allele, cn)
sp_allele1 = sp_allele.split("/")
if "*10x2" in sp_allele1:
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8)
if test_36 == 'norm_dup':
pass
elif test_36 == 'hyb_36_10':
sp_allele = sp_allele.replace('*10x2', '*36+*10')
elif test_36 == 'hyb_36_36':
sp_allele = sp_allele.replace('*10x2', '*36x2')
gene_alleles = sp_allele
print(sp_allele)
elif snv_def_alleles[0] == '*57' or snv_def_alleles[1] == '*57':
alt_allele_ind = 1 - snv_def_alleles.index('*57')
alt_allele = snv_def_alleles[alt_allele_ind]
sp_allele = tandem_57_10(in_list, alt_allele, cn)
print(sp_allele)
elif snv_def_alleles[0] != snv_def_alleles[1]:
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
if phased_dup == 'check':
phased_dup == 'No_call'
else:
pass
phased_dup1 = phased_dup.split("/")
if '*4x2' in phased_dup1:
count1 = phased_dup1.count('*4x2')
a_ind1 = phased_dup1.index('*4x2')
a_ind2 = 1 - a_ind1
other_hap = phased_dup1[a_ind2]
if count1 == 1:
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
if test_68 == 'norm_dup':
pass
elif test_68 == 'hyb_68':
if int(cn_in1_3pr) < int(cn):
phased_dup = phased_dup.replace('*4x2', '*68+*4')
elif int(cn_in1_3pr) == int(cn) and ('x' not in other_hap) and int(cn) == 4:
phased_dup = phased_dup.replace('*4x2', '*68+*4')
phased_dup = phased_dup.replace(other_hap, (other_hap + 'x2'))
elif count1 == 2:
pass
if '*4x3' in phased_dup1:
count1 = phased_dup1.count('*4x3')
a_ind1 = phased_dup1.index('*4x3')
a_ind2 = 1 - a_ind1
other_hap = phased_dup1[a_ind2]
if count1 == 1:
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
if test_68 == 'norm_dup':
pass
elif test_68 == 'hyb_68':
if int(cn_in1_3pr) < int(cn):
phased_dup = phased_dup.replace('*4x3', '*68+*4')
elif int(cn_in1_3pr) == int(cn) and 'x' not in other_hap:
phased_dup = phased_dup.replace('*4x3', '*68+*4')
elif count1 == 2:
pass
if '*10x2' in phased_dup1:
count2 = phased_dup1.count('*10x2')
b_ind1 = phased_dup1.index('*10x2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_dup':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x2', '*36+*10')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x2', '*36x2')
if '*10x3' in phased_dup1:
count3 = phased_dup1.count('*10x3')
c_ind1 = phased_dup1.index('*10x3')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x3', '*36x2+*10')
if '*1x3' in phased_dup1:
count2 = phased_dup1.count('*1x3')
b_ind1 = phased_dup1.index('*1x3')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_83 = hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_83 == 'norm_star39':
pass
elif test_83 == 'hyb_83':
phased_dup = phased_dup.replace('*1x3', '*1x2+*83')
if '*2' in phased_dup1:
count2 = phased_dup1.count('*2')
b_ind1 = phased_dup1.index('*2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
if test_13_2_v1 == 'norm_var':
pass
elif test_13_2_v2 == 'norm_var':
pass
elif test_13_2_v1 == 'hyb_13_2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
elif test_13_2_v2 == 'hyb_13_2_v2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13'
if '*2x2' in phased_dup1:
count2 = phased_dup1.count('*2x2')
b_ind1 = phased_dup1.index('*2x2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
if test_13_2_v1 == 'norm_var':
pass
elif test_13_2_v2 == 'norm_var':
pass
elif test_13_2_v1 == 'hyb_13_2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
elif test_13_2_v2 == 'hyb_13_2_v2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
gene_alleles = phased_dup
print(phased_dup)
elif snv_def_alleles[0] == snv_def_alleles[1]:
rt_2 = int(cn) - 1
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
phased_dup1 = phased_dup.split("/")
if '*4x2' in phased_dup1:
count1 = phased_dup1.count('*4x2')
a_ind1 = phased_dup1.index('*4x2')
a_ind2 = 1 - a_ind1
if count1 == 1:
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
if test_68 == 'norm_dup':
pass
elif test_68 == 'hyb_68':
phased_dup.replace('*4x2', '*68+*4')
if '*10x2' in phased_dup1:
count2 = phased_dup1.count('*10x2')
b_ind1 = phased_dup1.index('*10x2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_dup':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x2', '*36+*10')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x2', '*36x2')
if '*10x3' in phased_dup1:
count3 = phased_dup1.count('*10x3')
c_ind1 = phased_dup1.index('*10x3')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
elif test_36 == 'hyb_36_36':
phased_dup = '*36+*10/*36+*10'
if '*1x3' in phased_dup1:
count2 = phased_dup1.count('*1x3')
b_ind1 = phased_dup1.index('*1x3')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_83 = hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_83 == 'norm_star39':
pass
elif test_83 == 'hyb_83':
phased_dup = phased_dup.replace('*1x3', '*1x2+*83')
if '*2x2' in phased_dup1:
count2 = phased_dup1.count('*2x2')
b_ind1 = phased_dup1.index('*2x2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
if test_13_2_v1 == 'norm_var':
pass
elif test_13_2_v2 == 'norm_var':
pass
elif test_13_2_v1 == 'hyb_13_2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
elif test_13_2_v2 == 'hyb_13_2_v2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
gene_alleles = phased_dup
print(phased_dup)
elif int(cn) > 4 and snv_def_alleles != None:
if "or" in snv_def_alleles:
print (snv_def_alleles + "\t" + "Duplication present")
else:
snv_def_alleles = snv_def_alleles.split("/")
snv_cand_alleles = "".join(snv_cand_alleles)
snv_cand_alleles = snv_cand_alleles.split("_")
if snv_def_alleles[0] != snv_def_alleles[1]:
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
if phased_dup == 'check':
phased_dup = 'No_call'
else:
pass
phased_dup1 = phased_dup.split("/")
if '*10x4' in phased_dup1:
count3 = phased_dup1.count('*10x4')
c_ind1 = phased_dup1.index('*10x4')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x4', '*36+*10x3')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x4', '*36x2+*10x2')
elif test_36 == 'hyb_36_36_36':
phased_dup = phased_dup.replace('*10x4','*36x3+*10')
else:
phased_dup = "No_call"
elif '*10x3' in phased_dup1:
count3 = phased_dup1.count('*10x3')
c_ind1 = phased_dup1.index('*10x3')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x3', '*36x2+*10')
elif test_36 == 'hyb_36_36_36':
phased_dup = phased_dup.replace('*10x3','*36x3')
elif '*10x' in phased_dup1:
phased_dup = 'No_call'
elif snv_def_alleles[0] == snv_def_alleles[1]:
rt_2 = int(cn) - 1
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
if phased_dup == 'check':
phased_dup = 'No_call'
else:
pass
phased_dup1 = phased_dup.split("/")
if '*10x4' in phased_dup1:
count3 = phased_dup1.count('*10x4')
c_ind1 = phased_dup1.index('*10x4')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x4', '*36+*10x3')
elif test_36 == 'hyb_36_36':
phased_dup = '*36+*10/*36+*10x2'
elif test_36 == 'hyb_36_36_36':
phased_dup = '*36+*10/*36x2+*10'
else:
phased_dup = "No_call"
elif phased_dup1[0].startswith('*10x') or phased_dup1[1].startswith('*10x'):
if phased_dup1[0].startswith('*10x'):
dup_10_hyb = phased_dup1[0]
elif phased_dup1[1].startswith('*10x'):
dup_10_hyb = phased_dup1[1]
cn_star10 = dup_10_hyb[(dup_10_hyb.find('x') + 1 ):]
test_36 = hybrid_test_36_multi_10(sv_dup, cn, av_cov, cn_ex9_3pr, cn_star10)
if test_36 == 'norm_mt':
pass
elif test_36 == 'check':
phased_dup = 'No_call'
else:
c_ind1 = phased_dup1.index(dup_10_hyb)
c_ind2 = 1 - c_ind1
phased_dup = str(phased_dup1[c_ind2]) + "/" + test_36
gene_alleles = phased_dup
print(phased_dup)
elif int(cn) > 2 and snv_def_alleles == None:
print("Possible rare CYP2D6/2D7 hybrid present")
print("\nActivity score:")
score_list = []
score_list1 = []
score_list2 = []
score_list3 = []
allele_dict = {}
def get_ac_score(act_score, star_alleles):
for line in open(act_score, "r"):
line = line.strip().split()
score_list.append(line)
for i in score_list:
allele_dict[i[0]] = i[1]
star_alleles = star_alleles.replace("/", "+")
star_alleles = star_alleles.split("+")
for elem in star_alleles:
if "x" not in elem:
m_allele = elem
n_allele = "1"
elif "x" in elem:
index1 = elem.find("x")
m_allele = elem[:index1]
n_allele = elem[index1+1:]
p_allele = allele_dict[m_allele] + "_" + n_allele
p_allele = p_allele.split("_")
score_list1.append(p_allele)
for i in score_list1:
score_list2.append(i[0])
if "n" in score_list2:
return "Indeterminate"
else:
for i in score_list1:
score_list3.append(float(i[0])*float(i[1]))
total_a_score = sum(score_list3)
return total_a_score
if gene_alleles in ["",'No_call','check']:
ac_score = "Indeterminate"
print(ac_score)
elif gene_alleles != "":
ac_score = get_ac_score(act_score, gene_alleles)
print(ac_score)
print("\nMetaboliser status:")
if ac_score == "Indeterminate":
print ("Indeterminate")
elif ac_score == 0:
print("Poor metaboliser (PM)")
elif 0 < ac_score < 1.25:
print("Intermediate metaboliser (IM)")
elif 1.25 <= ac_score <= 2.25:
print("Normal metaboliser (NM)")
elif ac_score > 2.25:
print("Ultrarapid metaboliser (UM)")

View File

@@ -0,0 +1,611 @@
#!/usr/bin/env python3
import os
import sys
import math
def get_total_CN(cov_file):
all_reg =[]
for line in open(cov_file, "r"):
line = line.strip().split()
all_reg.append(line)
av_2d6_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
av_vdr_cov = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
av_in1_3pr = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
av_ex9_3pr = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
av_5pr_in1 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
av_in4_3pr = float(all_reg[5][3])/(float(all_reg[5][2]) - float(all_reg[5][1]))
av_5pr_in4 = float(all_reg[6][3])/(float(all_reg[6][2]) - float(all_reg[6][1]))
av_2d7_ex9 = float(all_reg[7][3])/(float(all_reg[7][2]) - float(all_reg[7][1]))
av_2d7_in4_in8 = float(all_reg[8][3])/(float(all_reg[8][2]) - float(all_reg[8][1]))
av_egfr_cov = float(all_reg[9][3])/(float(all_reg[9][2]) - float(all_reg[9][1]))
av_2d7_ex2_in8 = float(all_reg[10][3])/(float(all_reg[10][2]) - float(all_reg[10][1]))
av_2d7_5pr_in1 = float(all_reg[11][3])/(float(all_reg[11][2]) - float(all_reg[11][1]))
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
# av_ctrl_cov = av_vdr_cov
comp_av = av_2d6_cov/av_ctrl_cov
temp_cn = 2 * comp_av + 0.15
total_cn = round(temp_cn)
in1_3pr = round(2 * av_in1_3pr/av_ctrl_cov)
ex9_3pr = (2 * av_ex9_3pr/av_ctrl_cov)
return [str(total_cn), round(av_2d6_cov), str(int(in1_3pr)), round(av_ctrl_cov), str(ex9_3pr), round(av_in1_3pr), str(av_in4_3pr), str(av_5pr_in4), str(av_2d7_ex9), str(av_2d7_in4_in8), str(av_2d7_ex2_in8), str(av_2d7_5pr_in1)];
samp_gt = ""
samp_gt_hap1 = ""
def del_test(sv_del):
if os.stat(sv_del).st_size == 0:
return "None"
else:
for line in open(sv_del, "r"):
if "COVERAGE" in line:
line = line.strip().split()
ABHom = line[-1]
ABHet = line[-2]
GT = line[2]
DP = int(line[3])
if float(ABHom) == 1.0:
return "*5/*5"
elif float(ABHom) == -1.0:
return "*5"
else:
pass
hap_adv_list = []
hap_t1 = []
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
g = open(hap_dbs, "r")
for line in g:
line = line.strip().split()
hap_adv_list.append(line)
a1 = core_vars.split(";")
for i in a1:
if i[-3:] == "0/1":
hap_t1.append(i[:-4])
for elem in hap_adv_list:
if elem[1] == cand_allele1:
list_t1 = (elem[2]).split(';')
if elem[1] == cand_allele2:
list_t2 = (elem[2]).split(';')
if hap_t1[0] in list_t1:
return test_allele1
elif hap_t1[0] in list_t2:
return test_allele2
het_hom_list = []
het_hom_list_new = []
def dup_test_init(sv_dup, av_cov):
for line in open(sv_dup, "r"):
if "COVERAGE" in line:
continue
elif "AGGREGATED" in line:
continue
else:
fields = line.strip().split()
het_hom_list.append(fields)
test_list1 = []
for i in het_hom_list:
test_list1.append(int(i[2]))
av_read_cov = sum(test_list1)/len(test_list1)
norm_cov = (av_cov + av_read_cov)/2
for i in het_hom_list:
supp_reads = round(float(i[-2])*int(i[2]))
i.append(round(supp_reads/norm_cov, 3))
i.append(supp_reads)
het_hom_list_new.append(i)
return (het_hom_list_new)
hap_def_list = []
allele_cn_list = []
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
g = open(hap_dbs, "r")
for line in g:
line = line.strip().split()
hap_def_list.append(line)
test_list1 = []
test_list2 = []
test_list3 = []
het_list = []
for i in in_list:
if i[1] == "0/1":
het_list.append(i)
# return het_list
# if len(het_list) > 1 and het_list[0][0] == "42522613~G>C":
# het_list.pop(0)
# else:
# pass
for i in het_list:
test_list1.append(i[0])
test_list2.append(i[-2])
test_list3.append(float(i[-4]))
max_het = max(test_list2)
# if max_het > 1:
# max_het = min(test_list2)
# else:
# pass
max_het_pos = test_list2.index(max_het)
var = test_list1[max_het_pos]
if max_het > 1:
max_het = test_list3[max_het_pos]
elif max_het > test_list3[max_het_pos]:
max_het = test_list3[max_het_pos]
else:
pass
for elem in hap_def_list:
if elem[1] == cand_allele1:
list_3t = elem
list_3t_2 = list_3t[2].split(';')
l3 = len(list_3t_2)
if elem[1] == cand_allele2:
list_4t = elem
list_4t_2 = list_4t[2].split(';')
l4 = len(list_4t_2)
hdb_list = list_3t_2 + list_4t_2
# return hdb_list
index_var = hdb_list.index(var)
if index_var < l3:
allele_cn_list.append(test_allele1)
allele_cn_list.append(int(round(max_het*int(c_num))))
elif index_var >= l3:
allele_cn_list.append(test_allele2)
allele_cn_list.append(int(round(max_het*int(c_num))))
if allele_cn_list[0] == test_allele1:
rt_2 = int(c_num) - allele_cn_list[1]
allele_cn_list.append(test_allele2)
allele_cn_list.append(rt_2)
elif allele_cn_list[0] == test_allele2:
rt_2 = int(c_num) - allele_cn_list[1]
allele_cn_list.append(test_allele1)
allele_cn_list.append(rt_2)
# return allele_cn_list
if allele_cn_list[1] == 0:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
elif allele_cn_list[3] == 0:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
elif allele_cn_list[1] == 1:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 1:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == 2:
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 2:
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[3] < -1:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(3)
elif allele_cn_list[1] < -1:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(3)
else:
res_dip = 'check'
return res_dip
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
g = open(hap_dbs, "r")
for line in g:
line = line.strip().split()
hap_def_list.append(line)
test_list1 = []
test_list2 = []
het_list = []
for i in in_list:
if i[1] == "0/1":
het_list.append(i)
# if len(het_list) > 1 and het_list[0][0] == "42522613~G>C":
# het_list.pop(0)
# else:
# pass
for i in het_list:
test_list1.append(i[0])
test_list2.append(i[-2])
max_het = max(test_list2)
max_het_pos = test_list2.index(max_het)
var = test_list1[max_het_pos]
for elem in hap_def_list:
if elem[1] == cand_allele1:
list_3t = elem
list_3t_2 = list_3t[2].split(';')
l3 = len(list_3t_2)
if elem[1] == cand_allele2:
list_4t = elem
list_4t_2 = list_4t[2].split(';')
l4 = len(list_4t_2)
hdb_list = list_3t_2 + list_4t_2
index_var = hdb_list.index(var)
if index_var < l3:
allele_cn_list.append(test_allele1)
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
elif index_var >= l3:
allele_cn_list.append(test_allele2)
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
if allele_cn_list[0] == test_allele1:
rt_2 = int(c_num) - allele_cn_list[1]
allele_cn_list.append(test_allele2)
allele_cn_list.append(rt_2)
elif allele_cn_list[0] == test_allele2:
rt_2 = int(c_num) - allele_cn_list[1]
allele_cn_list.append(test_allele1)
allele_cn_list.append(rt_2)
if allele_cn_list[1] == 0:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
elif allele_cn_list[3] == 0:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
elif allele_cn_list[1] == 1:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 1:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == 2:
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 2:
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == 3:
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 3:
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == 4:
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 4:
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[3] < 0:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] + allele_cn_list[3] - 1)
elif allele_cn_list[1] < 0:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] + allele_cn_list[1] - 1)
else:
res_dip = 'check'
return res_dip
def hybrid_test_68(sv_dup, c_num, av_cov, cn_in1_3pr1, in_list):
test_list1 = []
test_list2 = []
test_list3 = []
for i in in_list:
test_list1.append(i[0])
test_list2.append(abs(float(i[-2])))
test_list3.append(i[-1])
index1 = test_list1.index('42526694~G>A')
index2 = test_list1.index('42524947~C>T')
val_68 = test_list3[index1]
val_4 = test_list3[index2]
rt = val_68/val_4
if rt <= 1.4:
return 'norm_dup'
elif rt > 1.4:
return 'hyb_68'
else:
return 'norm_dup'
def hyb_test_5_68_4(sv_del, in1_3pr1_float, av_cov):
test_del = []
for line in open(sv_del, "r"):
if "COVERAGE" in line:
test_del.append(line.strip())
t1 = 2 * in1_3pr1_float/av_cov
if len(test_del) == 0 and (1.6 < t1 < 2.8):
return 'norm_art'
elif len(test_del) > 0 and t1 < 1.6:
return 'del_hyb'
def hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_dup'
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_36_10'
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.65):
return 'hyb_36_36'
def hybrid_test_36_single(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_star10'
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_36_single'
else:
return 'norm_star10'
def hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_mt'
elif ((int(cn) - 1) - 0.3) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_36_10'
elif ((int(cn) - 2) - 0.3) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.7):
return 'hyb_36_36'
def hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_mt'
elif ((int(cn) - 1) - 0.05) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_36_10'
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.95):
return 'hyb_36_36'
elif (int(cn) - 3) <= float(cn_ex9_3pr) < (int(cn) - 3 + 0.95):
return 'hyb_36_36_36'
else:
return 'check'
def hybrid_test_36_multi_10(sv_dup, cn, av_cov, cn_ex9_3pr, cn_star10):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_mt'
elif float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
cn_star36 = int(cn) - int(round(float(cn_ex9_3pr)))
adj_cn_star10 = int(cn_star10) - cn_star36
if cn_star36 == 1:
return '*36+*10x' + str(adj_cn_star10)
else:
return '*36x' + str(cn_star36) + '+*10x' + str(adj_cn_star10)
else:
return 'check'
def hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4):
if 0.85 < float(cov_in4_3pr)/float(cov_5pr_in4) < 1.2:
return 'norm_var'
elif 0.45 < float(cov_in4_3pr)/float(cov_5pr_in4) < 0.75:
return 'hyb_13_2'
else:
return 'norm_var'
def hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1):
if 0.85 < float(cov_2d7_ex2_in8)/float(cov_2d7_5pr_in1) < 1.2:
return 'norm_var'
elif 0.45 < float(cov_2d7_ex2_in8)/float(cov_2d7_5pr_in1) < 0.75:
return 'hyb_13_2_v2'
else:
return 'norm_var'
def tandem_90_1(in_list, alt_allele, cn):
test_list1 = []
test_list2 = []
test_list3 = []
for i in in_list:
test_list1.append(i[0])
test_list2.append(abs(float(i[-2])))
test_list3.append(i[-1])
if len(test_list1) > 1:
index1 = test_list1.index('42525100~T>C')
a = test_list3[index1]
test_list3.pop(index1)
b = max(test_list3)
c = round(b/a)
if int(cn) == 3 and c == 1:
res = alt_allele + "/" + "*90+*1"
elif int(cn) == 3 and c > 1:
res = alt_allele + "x2" + "/" + "*90"
elif int(cn) == 4 and c == 2:
res = alt_allele + "x2" + "/" + "*90+*1"
elif int(cn) == 4 and c >= 3:
res = alt_allele + "x3" + "/" + "*90"
else:
val1 = test_list2[0]
val2 = round(val1 * int(cn))
if int(cn) == 3 and val2 == 1:
res = '*1/*90+*1'
return res
def tandem_57_10(in_list, alt_allele, cn):
test_list1 = []
test_list2 = []
test_list3 = []
for i in in_list:
test_list1.append(i[0])
test_list2.append(abs(float(i[-2])))
test_list3.append(i[-1])
if len(test_list1) > 1:
index1 = test_list1.index('42525908~G>A')
a = test_list3[index1]
test_list3.pop(index1)
index2 = test_list1.index('42526694~G>A')
m = test_list3[index2]
test_list3.pop(index2)
b = max(test_list3)
c = round(b/a)
p = round(m/a)
if int(cn) == 3 and c == 1 and p > 1:
res = alt_allele + "/" + "*57+*10"
elif int(cn) == 3 and c > 1 and p == 1:
res = alt_allele + "x2" + "/" + "*57"
elif int(cn) == 4 and c == 2 and p > 1:
res = alt_allele + "x2" + "/" + "*57+*10"
elif int(cn) == 4 and c >= 3 and p == 1:
res = alt_allele + "x3" + "/" + "*57"
elif int(cn) == 4 and p == 1 and alt_allele == '*10':
res = "*57+*10" + "/" + "*57+*10"
elif int(cn) == 4 and p > 1 and alt_allele == '*10':
res = "*10x2" + "/" + "*57+*10"
else:
res = alt_allele + "/" + "*57+*10"
return res
def hybrid_test_83_single(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_star39'
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_83_single'
else:
return 'norm_star39'
def hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_star39'
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_83'
else:
return 'norm_star39'

View File

@@ -0,0 +1,77 @@
#!/usr/bin/env python3
import os
import sys
def get_backgroud_alleles(database, core_vars):
dbs = []
dbs_temp = []
core_vars_list = core_vars.split(";")
core_temp1 = core_vars_list[-1][:-4]
core_temp2 = core_vars_list[0][:-4]
for line in open(database, "r"):
line = line.strip().split("\t")
dbs.append(line)
for record in dbs:
temp_rec = record[1]
if core_temp1 and core_temp2 in temp_rec:
dbs_temp.append(record)
scores = []
candidates = []
cand_vars = []
for elem in dbs_temp:
candidates.append(elem[0])
record_core_var = elem[1].split(";")
cand_vars.append(record_core_var)
counter = 0
for i in record_core_var:
if i in core_vars_list:
counter += 3
elif i[:-4] in core_vars:
counter += 1
else:
counter += -2
scores.append(counter)
cand_diplos = []
diplo_vars2 = []
if len(scores) == 0:
diplo1 = '1.v1_1.v1'
allele_res = '*1/*1'
else:
max_score = max(scores)
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
for i in indices:
diplo = candidates[i]
diplo_vars1 = len(cand_vars[i])
cand_diplos.append(diplo)
diplo_vars2.append(diplo_vars1)
min_index = diplo_vars2.index(min(diplo_vars2))
diplo1 = cand_diplos[min_index]
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
hap1 = "*" + str (diplo1[:res2[0]])
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
allele_res = hap1 + "/" + hap2
return [allele_res, diplo1];

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,810 @@
import os
import sys
import subprocess
from snv_def_modules import *
from sv_modules import *
from bkg_modules import *
print("--------------------------------------------\n")
print("CYP2D6 Star Allele Calling with StellarPGx\n")
print("--------------------------------------------\n")
database = sys.argv[1]
infile = sys.argv[2]
infile_full = sys.argv[3]
infile_full_gt = sys.argv[4]
infile_spec = sys.argv[5]
sv_del = sys.argv[6]
sv_dup = sys.argv[7]
cov_file = sys.argv[8]
hap_dbs = sys.argv[9]
act_score = sys.argv[10]
cn = get_total_CN(cov_file)[0]
print("Initially computed CN = {}".format(cn))
supp_core_vars = get_core_variants(infile, cn)
print("\nSample core variants:")
print(supp_core_vars)
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
if snv_def_calls == None:
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
if int(cn) == 0:
print("\nResult:")
print("*5/*5")
elif bac_alleles == None:
print("\nResult:")
print("Possible novel allele or suballele present: interpret with caution")
elif bac_alleles != None and int(cn) < 2:
bac_alleles = bac_alleles[0].split("/")
bac_alleles1 = bac_alleles[0] + "/" + "*5"
print("\nResult:")
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
print("\nLikely background alleles:")
print("[" + bac_alleles1 + "]")
else:
print("\nCandidate alleles:")
print("[" + bac_alleles[-1] + "]")
print("\nResult:")
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
print("\nLikely background alleles:")
print("[" + bac_alleles[0] + "]")
print("\nActivity score:")
print("Indeterminate")
print("\nMetaboliser status:")
print("Indeterminate")
sys.exit()
best_diplos = snv_def_calls[0]
print("\nCandidate alleles:")
print(best_diplos)
snv_def_alleles = snv_def_calls[-1]
if "or" in snv_def_alleles:
pass
else:
snv_cand_alleles = snv_def_calls[1]
dip_variants = get_all_vars_gt(infile_full_gt)
print("\nResult:")
av_cov = get_total_CN(cov_file)[3]
cn_in1_3pr = get_total_CN(cov_file)[2]
cn_ex9_3pr = get_total_CN(cov_file)[4]
in1_3pr_float = get_total_CN(cov_file)[5]
cov_in4_3pr = get_total_CN(cov_file)[6]
cov_5pr_in4 = get_total_CN(cov_file)[7]
cn_2d7_ex9 = get_total_CN(cov_file)[8]
cn_2d7_in4_in8 = get_total_CN(cov_file)[9]
cov_2d7_ex2_in8 = get_total_CN(cov_file)[10]
cov_2d7_5pr_in1 = get_total_CN(cov_file)[11]
gene_alleles = ""
if snv_def_alleles != '*1/*1':
in_list = dup_test_init(sv_dup, av_cov)
if cn == '2' and snv_def_alleles == '*4/*4':
test_68 = hyb_test_5_68_4(sv_del, in1_3pr_float, av_cov)
if test_68 == 'norm_art':
pass
elif test_68 == 'del_hyb':
snv_def_alleles = (snv_def_alleles.replace('*4', '*5', 1)).replace('*4', '*68+*4')
gene_alleles = snv_def_alleles
print(gene_alleles)
elif cn == '2':
# print(snv_def_alleles)
if 'or' in snv_def_alleles:
# print ("\n")
print (snv_def_alleles)
else:
snv_def_alleles = snv_def_alleles.split("/")
if snv_def_alleles[0] == '*2' or snv_def_alleles[1] == '*2':
ind_star2 = snv_def_alleles.index('*2')
ind_other = 1 - ind_star2
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
if test_13_2_v1 == 'norm_var':
gene_alleles = "/".join(snv_def_alleles)
print(gene_alleles)
elif test_13_2_v1 == 'hyb_13_2':
gene_alleles = snv_def_alleles[ind_other] + "/" + "*13+*2"
print(gene_alleles)
elif test_13_2_v2 == 'hyb_13_2_v2':
gene_alleles = snv_def_alleles[ind_other] + "/" + "*13"
print(gene_alleles)
elif snv_def_alleles[0] == '*39' or snv_def_alleles[1] == '*39':
ind_star2 = snv_def_alleles.index('*39')
ind_other = 1 - ind_star2
test_83_single = hybrid_test_83_single(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_83_single == 'norm_star39':
gene_alleles = "/".join(snv_def_alleles)
print(gene_alleles)
elif test_83_single == 'hyb_83_single':
gene_alleles = snv_def_alleles[ind_other] + "/" + "*83"
print(gene_alleles)
elif snv_def_alleles[0] == '*10' or snv_def_alleles[1] == '*10':
ind_star2 = snv_def_alleles.index('*10')
ind_other = 1 - ind_star2
test_36_single = hybrid_test_36_single(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36_single == 'norm_star10':
gene_alleles = "/".join(snv_def_alleles)
print(gene_alleles)
elif test_36_single == 'hyb_36_single':
gene_alleles = snv_def_alleles[ind_other] + "/" + "*36"
print(gene_alleles)
else:
# print("\n")
gene_alleles = "/".join(snv_def_alleles)
print(gene_alleles)
elif cn == '0':
del_confirm = del_test(sv_del)
if del_confirm == '*5/*5':
gene_alleles = del_confirm
print(gene_alleles)
elif del_confirm == '*5':
samp_dip = del_confirm + "/" + "*other"
print(samp_dip)
else:
gene_alleles = "*5/*5"
print (gene_alleles)
elif cn == '1':
del_confirm = del_test(sv_del)
if "or" in snv_def_alleles and del_confirm == 'None':
print (snv_def_alleles + "\t" + "Possible CYP2D6 gene deletion (*5) present")
elif "or" not in snv_def_alleles and del_confirm == 'None':
snv_def_alleles = snv_def_alleles.split("/")
snv_cand_alleles = "".join(snv_cand_alleles)
snv_cand_alleles = snv_cand_alleles.split("_")
if snv_def_alleles[0] == snv_def_alleles[1]:
gene_alleles = snv_def_alleles[0] + "/" + "*5"
print(gene_alleles)
elif snv_def_alleles[0] != snv_def_alleles[1]:
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
gene_alleles = samp_allele1 + "/" + "*5"
print(gene_alleles)
else:
snv_def_alleles = snv_def_alleles.split("/")
snv_cand_alleles = "".join(snv_cand_alleles)
snv_cand_alleles = snv_cand_alleles.split("_")
if snv_def_alleles[0] == snv_def_alleles[1]:
if del_confirm == "*5/*5":
del_confirm = "*5"
else:
del_confirm = "*5"
gene_alleles = (del_confirm + "/" + snv_def_alleles[0])
print(gene_alleles)
elif snv_def_alleles[0] != snv_def_alleles[1]:
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
if del_confirm == "*5/*5":
del_confirm = "*5"
else:
del_confirm = "*5"
gene_alleles = (del_confirm + "/" + samp_allele1)
print(gene_alleles)
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
orig = snv_def_alleles
if "or" in snv_def_alleles:
print (snv_def_alleles + "\t" + "Duplication present")
else:
snv_def_alleles = snv_def_alleles.split("/")
snv_cand_alleles = "".join(snv_cand_alleles)
snv_cand_alleles = snv_cand_alleles.split("_")
if snv_def_alleles[0] == '*90' or snv_def_alleles[1] == '*90':
alt_allele_ind = 1 - snv_def_alleles.index('*90')
alt_allele = snv_def_alleles[alt_allele_ind]
sp_allele = tandem_90_1(in_list, alt_allele, cn)
sp_allele1 = sp_allele.split("/")
if "*10x2" in sp_allele1:
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8)
if test_36 == 'norm_dup':
pass
elif test_36 == 'hyb_36_10':
sp_allele = sp_allele.replace('*10x2', '*36+*10')
elif test_36 == 'hyb_36_36':
sp_allele = sp_allele.replace('*10x2', '*36x2')
gene_alleles = sp_allele
print(gene_alleles)
elif snv_def_alleles[0] == '*57' or snv_def_alleles[1] == '*57':
alt_allele_ind = 1 - snv_def_alleles.index('*57')
alt_allele = snv_def_alleles[alt_allele_ind]
sp_allele = tandem_57_10(in_list, alt_allele, cn)
print(sp_allele)
elif snv_def_alleles[0] != snv_def_alleles[1]:
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
if phased_dup == 'check':
phased_dup == 'No_call'
else:
pass
phased_dup1 = phased_dup.split("/")
if '*4x2' in phased_dup1:
count1 = phased_dup1.count('*4x2')
a_ind1 = phased_dup1.index('*4x2')
a_ind2 = 1 - a_ind1
other_hap = phased_dup1[a_ind2]
if count1 == 1:
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
if test_68 == 'norm_dup':
pass
elif test_68 == 'hyb_68':
if int(cn_in1_3pr) < int(cn):
phased_dup = phased_dup.replace('*4x2', '*68+*4')
elif int(cn_in1_3pr) == int(cn) and ('x' not in other_hap) and int(cn) == 4:
phased_dup = phased_dup.replace('*4x2', '*68+*4')
phased_dup = phased_dup.replace(other_hap, (other_hap + 'x2'))
else:
phased_dup = phased_dup.replace('*4x2', '*68+*4')
elif count1 == 2:
pass
if '*4x3' in phased_dup1:
count1 = phased_dup1.count('*4x3')
a_ind1 = phased_dup1.index('*4x3')
a_ind2 = 1 - a_ind1
other_hap = phased_dup1[a_ind2]
if count1 == 1:
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
if test_68 == 'norm_dup':
pass
elif test_68 == 'hyb_68':
if int(cn_in1_3pr) < int(cn):
phased_dup = phased_dup.replace('*4x3', '*68+*4')
elif int(cn_in1_3pr) == int(cn) and 'x' not in other_hap:
phased_dup = phased_dup.replace('*4x3', '*68+*4')
# phased_dup = phased_dup.replace(other_hap, (other_hap + 'x2'))
elif count1 == 2:
pass
if '*10x2' in phased_dup1:
count2 = phased_dup1.count('*10x2')
b_ind1 = phased_dup1.index('*10x2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8)
if test_36 == 'norm_dup':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x2', '*36+*10')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x2', '*36x2')
if '*10x3' in phased_dup1:
count3 = phased_dup1.count('*10x3')
c_ind1 = phased_dup1.index('*10x3')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x3', '*36x2+*10')
if '*1x3' in phased_dup1:
count2 = phased_dup1.count('*1x3')
b_ind1 = phased_dup1.index('*1x3')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_83 = hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_83 == 'norm_star39':
pass
elif test_83 == 'hyb_83':
phased_dup = phased_dup.replace('*1x3', '*1x2+*83')
if '*2' in phased_dup1:
count2 = phased_dup1.count('*2')
b_ind1 = phased_dup1.index('*2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
if test_13_2_v1 == 'norm_var':
pass
elif test_13_2_v2 == 'norm_var':
pass
elif test_13_2_v1 == 'hyb_13_2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
elif test_13_2_v2 == 'hyb_13_2_v2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13'
if '*2x2' in phased_dup1:
count2 = phased_dup1.count('*2x2')
b_ind1 = phased_dup1.index('*2x2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
if test_13_2_v1 == 'norm_var':
pass
elif test_13_2_v2 == 'norm_var':
pass
elif test_13_2_v1 == 'hyb_13_2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
elif test_13_2_v2 == 'hyb_13_2_v2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
gene_alleles = phased_dup
print(gene_alleles)
elif snv_def_alleles[0] == snv_def_alleles[1]:
rt_2 = int(cn) - 1
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
phased_dup1 = phased_dup.split("/")
if '*4x2' in phased_dup1:
count1 = phased_dup1.count('*4x2')
a_ind1 = phased_dup1.index('*4x2')
a_ind2 = 1 - a_ind1
if count1 == 1:
test_68 = hybrid_test_68(sv_dup, cn, av_cov, cn_in1_3pr, in_list)
if test_68 == 'norm_dup':
pass
elif test_68 == 'hyb_68':
phased_dup.replace('*4x2', '*68+*4')
if '*10x2' in phased_dup1:
count2 = phased_dup1.count('*10x2')
b_ind1 = phased_dup1.index('*10x2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_36 = hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8)
# print (test_36)
if test_36 == 'norm_dup':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x2', '*36+*10')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x2', '*36x2')
if '*10x3' in phased_dup1:
count3 = phased_dup1.count('*10x3')
c_ind1 = phased_dup1.index('*10x3')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
elif test_36 == 'hyb_36_36':
phased_dup = '*36+*10/*36+*10'
if '*1x3' in phased_dup1:
count2 = phased_dup1.count('*1x3')
b_ind1 = phased_dup1.index('*1x3')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_83 = hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_83 == 'norm_star39':
pass
elif test_83 == 'hyb_83':
phased_dup = phased_dup.replace('*1x3', '*1x2+*83')
if '*2x2' in phased_dup1:
count2 = phased_dup1.count('*2x2')
b_ind1 = phased_dup1.index('*2x2')
b_ind2 = 1 - b_ind1
if count2 == 1:
test_13_2_v1 = hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4)
test_13_2_v2 = hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1)
if test_13_2_v1 == 'norm_var':
pass
elif test_13_2_v2 == 'norm_var':
pass
elif test_13_2_v1 == 'hyb_13_2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
elif test_13_2_v2 == 'hyb_13_2_v2':
phased_dup = phased_dup1[b_ind2] + "/" + '*13+*2'
gene_alleles = phased_dup
print(gene_alleles)
elif int(cn) > 4 and snv_def_alleles != None:
if "or" in snv_def_alleles:
print (snv_def_alleles + "\t" + "Duplication present")
else:
snv_def_alleles = snv_def_alleles.split("/")
snv_cand_alleles = "".join(snv_cand_alleles)
snv_cand_alleles = snv_cand_alleles.split("_")
if snv_def_alleles[0] != snv_def_alleles[1]:
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
if phased_dup == 'check':
phased_dup = 'No_call'
else:
pass
phased_dup1 = phased_dup.split("/")
if '*10x4' in phased_dup1:
count3 = phased_dup1.count('*10x4')
c_ind1 = phased_dup1.index('*10x4')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x4', '*36+*10x3')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x4', '*36x2+*10x2')
elif test_36 == 'hyb_36_36_36':
phased_dup = phased_dup.replace('*10x4','*36x3+*10')
else:
phased_dup = "No_call"
elif '*10x3' in phased_dup1:
count3 = phased_dup1.count('*10x3')
c_ind1 = phased_dup1.index('*10x3')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x3', '*36+*10x2')
elif test_36 == 'hyb_36_36':
phased_dup = phased_dup.replace('*10x3', '*36x2+*10')
elif test_36 == 'hyb_36_36_36':
phased_dup = phased_dup.replace('*10x3','*36x3')
else:
phased_dup = "No_call"
elif phased_dup1[0].startswith('*10x') or phased_dup1[1].startswith('*10x'):
if phased_dup1[0].startswith('*10x'):
dup_10_hyb = phased_dup1[0]
elif phased_dup1[1].startswith('*10x'):
dup_10_hyb = phased_dup1[1]
cn_star10 = dup_10_hyb[(dup_10_hyb.find('x') + 1 ):]
test_36 = hybrid_test_36_multi_10(sv_dup, cn, av_cov, cn_ex9_3pr, cn_star10)
if test_36 == 'norm_mt':
pass
elif test_36 == 'check':
phased_dup = 'No_call'
else:
c_ind1 = phased_dup1.index(dup_10_hyb)
c_ind2 = 1 - c_ind1
phased_dup = str(phased_dup1[c_ind2]) + "/" + test_36
gene_alleles = phased_dup
print(gene_alleles)
elif snv_def_alleles[0] == snv_def_alleles[1]:
rt_2 = int(cn) - 1
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
if phased_dup == 'check':
phased_dup = 'No_call'
else:
pass
phased_dup1 = phased_dup.split("/")
if '*10x4' in phased_dup1:
count3 = phased_dup1.count('*10x4')
c_ind1 = phased_dup1.index('*10x4')
c_ind2 = 1 - c_ind1
if count3 == 1:
test_36 = hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr)
if test_36 == 'norm_mt':
pass
elif test_36 == 'hyb_36_10':
phased_dup = phased_dup.replace('*10x4', '*36+*10x3')
elif test_36 == 'hyb_36_36':
phased_dup = '*36+*10/*36+*10x2'
elif test_36 == 'hyb_36_36_36':
phased_dup = '*36+*10/*36x2+*10'
else:
phased_dup = "No_call"
elif '*10x' in phased_dup1:
phased_dup = "No_call"
gene_alleles = phased_dup
print(gene_alleles)
elif int(cn) > 2 and snv_def_alleles == None:
print("Possible rare CYP2D6/2D7 hybrid present")
print("\nActivity score:")
score_list = []
score_list1 = []
score_list2 = []
score_list3 = []
allele_dict = {}
def get_ac_score(act_score, star_alleles):
for line in open(act_score, "r"):
line = line.strip().split()
score_list.append(line)
for i in score_list:
allele_dict[i[0]] = i[1]
star_alleles = star_alleles.replace("/", "+")
star_alleles = star_alleles.split("+")
for elem in star_alleles:
if "x" not in elem:
m_allele = elem
n_allele = "1"
elif "x" in elem:
index1 = elem.find("x")
m_allele = elem[:index1]
n_allele = elem[index1+1:]
p_allele = allele_dict[m_allele] + "_" + n_allele
p_allele = p_allele.split("_")
score_list1.append(p_allele)
for i in score_list1:
score_list2.append(i[0])
if "n" in score_list2:
return "Indeterminate"
else:
for i in score_list1:
score_list3.append(float(i[0])*float(i[1]))
total_a_score = sum(score_list3)
return total_a_score
if gene_alleles in ["",'No_call','check']:
ac_score = "Indeterminate"
print(ac_score)
elif gene_alleles != "":
ac_score = get_ac_score(act_score, gene_alleles)
print(ac_score)
print("\nMetaboliser status:")
if ac_score == "Indeterminate":
print ("Indeterminate")
elif ac_score == 0:
print("Poor metaboliser (PM)")
elif 0 < ac_score < 1.25:
print("Intermediate metaboliser (IM)")
elif 1.25 <= ac_score <= 2.25:
print("Normal metaboliser (NM)")
elif ac_score > 2.25:
print("Ultrarapid metaboliser (UM)")

View File

@@ -0,0 +1,611 @@
#!/usr/bin/env python3
import os
import sys
import math
def get_total_CN(cov_file):
all_reg = []
for line in open(cov_file, "r"):
line = line.strip().split()
all_reg.append(line)
av_2d6_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
av_vdr_cov = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
av_in1_3pr = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
av_ex9_3pr = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
av_in4_3pr = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
av_5pr_in4 = float(all_reg[5][3])/(float(all_reg[5][2]) - float(all_reg[5][1]))
av_2d7_ex9 = float(all_reg[6][3])/(float(all_reg[6][2]) - float(all_reg[6][1]))
av_2d7_in4_in8 = float(all_reg[7][3])/(float(all_reg[7][2]) - float(all_reg[7][1]))
av_egfr_cov = float(all_reg[8][3])/(float(all_reg[8][2]) - float(all_reg[8][1]))
av_2d7_ex2_in8 = float(all_reg[9][3])/(float(all_reg[9][2]) - float(all_reg[9][1]))
av_2d7_5pr_in1 = float(all_reg[10][3])/(float(all_reg[10][2]) - float(all_reg[10][1]))
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
comp_av = av_2d6_cov/av_ctrl_cov
temp_cn = 2 * comp_av
total_cn = round(temp_cn)
in1_3pr = round(2 * av_in1_3pr/av_ctrl_cov)
ex9_3pr = (2 * av_ex9_3pr/av_ctrl_cov)
return [str(int(total_cn)), round(av_2d6_cov), str(int(in1_3pr)), round(av_ctrl_cov), str(ex9_3pr), round(av_in1_3pr), str(av_in4_3pr), str(av_5pr_in4), str(av_2d7_ex9), str(av_2d7_in4_in8), str(av_2d7_ex2_in8), str(av_2d7_5pr_in1)];
samp_gt = ""
samp_gt_hap1 = ""
def del_test(sv_del):
if os.stat(sv_del).st_size == 0:
return "None"
else:
for line in open(sv_del, "r"):
if "COVERAGE" in line:
line = line.strip().split()
ABHom = line[-1]
ABHet = line[-2]
GT = line[2]
DP = int(line[3])
if float(ABHom) == 1.0:
return "*5/*5"
elif float(ABHom) == -1.0:
return "*5"
else:
pass
hap_adv_list = []
hap_t1 = []
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
g = open(hap_dbs, "r")
for line in g:
line = line.strip().split()
hap_adv_list.append(line)
a1 = core_vars.split(";")
for i in a1:
if i[-3:] == "0/1":
hap_t1.append(i[:-4])
for elem in hap_adv_list:
if elem[1] == cand_allele1:
list_t1 = (elem[2]).split(';')
if elem[1] == cand_allele2:
list_t2 = (elem[2]).split(';')
if hap_t1[0] in list_t1:
return test_allele1
elif hap_t1[0] in list_t2:
return test_allele2
het_hom_list = []
het_hom_list_new = []
def dup_test_init(sv_dup, av_cov):
for line in open(sv_dup, "r"):
if "COVERAGE" in line:
continue
elif "AGGREGATED" in line:
continue
else:
fields = line.strip().split()
het_hom_list.append(fields)
test_list1 = []
for i in het_hom_list:
test_list1.append(int(i[2]))
av_read_cov = sum(test_list1)/len(test_list1)
norm_cov = (av_cov + av_read_cov)/2
for i in het_hom_list:
supp_reads = round(float(i[-2])*int(i[2]))
i.append(round(supp_reads/av_read_cov, 4))
i.append(supp_reads)
het_hom_list_new.append(i)
return (het_hom_list_new)
hap_def_list = []
allele_cn_list = []
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
g = open(hap_dbs, "r")
for line in g:
line = line.strip().split()
hap_def_list.append(line)
test_list1 = []
test_list2 = []
het_list = []
for i in in_list:
if i[1] == "0/1":
het_list.append(i)
for i in het_list:
test_list1.append(i[0])
test_list2.append(i[-2])
max_het = max(test_list2)
max_het_pos = test_list2.index(max_het)
var = test_list1[max_het_pos]
for elem in hap_def_list:
if elem[1] == cand_allele1:
list_3t = elem
list_3t_2 = list_3t[2].split(';')
l3 = len(list_3t_2)
if elem[1] == cand_allele2:
list_4t = elem
list_4t_2 = list_4t[2].split(';')
l4 = len(list_4t_2)
hdb_list = list_3t_2 + list_4t_2
index_var = hdb_list.index(var)
if index_var < l3:
allele_cn_list.append(test_allele1)
allele_cn_list.append(int(round(max_het*int(c_num))))
elif index_var >= l3:
allele_cn_list.append(test_allele2)
allele_cn_list.append(int(round(max_het*int(c_num))))
if allele_cn_list[0] == test_allele1:
rt_2 = int(c_num) - allele_cn_list[1]
allele_cn_list.append(test_allele2)
allele_cn_list.append(rt_2)
elif allele_cn_list[0] == test_allele2:
rt_2 = int(c_num) - allele_cn_list[1]
allele_cn_list.append(test_allele1)
allele_cn_list.append(rt_2)
if allele_cn_list[1] == 0:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
elif allele_cn_list[3] == 0:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
elif allele_cn_list[1] == 1:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 1:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == 2:
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 2:
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == -1:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 2)
elif allele_cn_list[3] == -1:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 2)
elif allele_cn_list[3] < -1:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(3)
elif allele_cn_list[1] < -1:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(3)
else:
res_dip = 'check'
return res_dip
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
g = open(hap_dbs, "r")
for line in g:
line = line.strip().split()
hap_def_list.append(line)
test_list1 = []
test_list2 = []
het_list = []
for i in in_list:
if i[1] == "0/1":
het_list.append(i)
for i in het_list:
test_list1.append(i[0])
test_list2.append(i[-2])
max_het = max(test_list2)
max_het_pos = test_list2.index(max_het)
var = test_list1[max_het_pos]
for elem in hap_def_list:
if elem[1] == cand_allele1:
list_3t = elem
list_3t_2 = list_3t[2].split(';')
l3 = len(list_3t_2)
if elem[1] == cand_allele2:
list_4t = elem
list_4t_2 = list_4t[2].split(';')
l4 = len(list_4t_2)
hdb_list = list_3t_2 + list_4t_2
index_var = hdb_list.index(var)
if index_var < l3:
allele_cn_list.append(test_allele1)
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
elif index_var >= l3:
allele_cn_list.append(test_allele2)
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
if allele_cn_list[0] == test_allele1:
rt_2 = int(c_num) - allele_cn_list[1]
allele_cn_list.append(test_allele2)
allele_cn_list.append(rt_2)
elif allele_cn_list[0] == test_allele2:
rt_2 = int(c_num) - allele_cn_list[1]
allele_cn_list.append(test_allele1)
allele_cn_list.append(rt_2)
if allele_cn_list[1] == 0:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
elif allele_cn_list[3] == 0:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
elif allele_cn_list[1] == 1:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 1:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == 2:
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 2:
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == 3:
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 3:
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == 4:
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
elif allele_cn_list[3] == 4:
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
elif allele_cn_list[1] == -1:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 2)
elif allele_cn_list[3] == -1:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 2)
elif allele_cn_list[3] < 0:
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] + allele_cn_list[3] - 1)
elif allele_cn_list[1] < 0:
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] + allele_cn_list[1] - 1)
else:
res_dip = 'check'
return res_dip
def hybrid_test_68(sv_dup, c_num, av_cov, cn_in1_3pr1, in_list):
test_list1 = []
test_list2 = []
test_list3 = []
for i in in_list:
test_list1.append(i[0])
test_list2.append(abs(float(i[-2])))
test_list3.append(i[-1])
index1 = test_list1.index('42130692~G>A')
index2 = test_list1.index('42128945~C>T')
val_68 = test_list3[index1]
val_4 = test_list3[index2]
rt = val_68/val_4
if rt <= 1.4:
return 'norm_dup'
elif rt > 1.4:
return 'hyb_68'
else:
return 'norm_dup'
def hyb_test_5_68_4(sv_del, in1_3pr1_float, av_cov):
test_del = []
for line in open(sv_del, "r"):
if "COVERAGE" in line:
test_del.append(line.strip())
# if len(test_del) == 0:
# return 'norm_art'
# elif len(test_del) > 0:
# return 'del_hyb'
t1 = 2 * in1_3pr1_float/av_cov
if len(test_del) == 0 and (1.6 < t1 < 2.8):
return 'norm_art'
elif len(test_del) > 0 and t1 < 1.6:
return 'del_hyb'
def hybrid_test_36(sv_dup, cn, av_cov, cn_ex9_3pr, cn_2d7_ex9, cn_2d7_in4_in8):
if ((int(cn) - 1) - 0.3) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5) or (2.5 < (2 * float(cn_2d7_ex9)/float(cn_2d7_in4_in8)) < 3.5):
return 'hyb_36_10'
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.7):
return 'hyb_36_36'
else:
return 'norm_dup'
def hybrid_test_36_single(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_star10'
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_36_single'
else:
return 'norm_star10'
def hybrid_test_36_mod(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_mt'
elif ((int(cn) - 1) - 0.05) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_36_10'
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.95):
return 'hyb_36_36'
def hybrid_test_36_multi(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_mt'
elif ((int(cn) - 1) - 0.05) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_36_10'
elif (int(cn) - 2) <= float(cn_ex9_3pr) < (int(cn) - 2 + 0.95):
return 'hyb_36_36'
elif (int(cn) - 3) <= float(cn_ex9_3pr) < (int(cn) - 3 + 0.95):
return 'hyb_36_36_36'
else:
return 'check'
def hybrid_test_36_multi_10(sv_dup, cn, av_cov, cn_ex9_3pr, cn_star10):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_mt'
elif float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
cn_star36 = int(cn) - int(round(float(cn_ex9_3pr)))
adj_cn_star10 = int(cn_star10) - cn_star36
if cn_star36 == 1:
return '*36+*10x' + str(adj_cn_star10)
else:
return '*36x' + str(cn_star36) + '+*10x' + str(adj_cn_star10)
else:
return 'check'
def hybrid_13_2_v1(cov_in4_3pr, cov_5pr_in4):
if 0.85 < float(cov_in4_3pr)/float(cov_5pr_in4) < 1.2:
return 'norm_var'
elif 0.45 < float(cov_in4_3pr)/float(cov_5pr_in4) < 0.75:
return 'hyb_13_2'
else:
return 'norm_var'
def hybrid_13_2_v2(cov_2d7_ex2_in8, cov_2d7_5pr_in1):
if 0.85 < float(cov_2d7_ex2_in8)/float(cov_2d7_5pr_in1) < 1.2:
return 'norm_var'
elif 0.45 < float(cov_2d7_ex2_in8)/float(cov_2d7_5pr_in1) < 0.75:
return 'hyb_13_2_v2'
else:
return 'norm_var'
def tandem_90_1(in_list, alt_allele, cn):
test_list1 = []
test_list2 = []
test_list3 = []
for i in in_list:
test_list1.append(i[0])
test_list2.append(abs(float(i[-2])))
test_list3.append(i[-1])
if len(test_list1) > 1:
index1 = test_list1.index('42129098~T>C')
a = test_list3[index1]
test_list3.pop(index1)
b = max(test_list3)
c = round(b/a)
if int(cn) == 3 and c == 1:
res = alt_allele + "/" + "*90+*1"
elif int(cn) == 3 and c > 1:
res = alt_allele + "x2" + "/" + "*90"
elif int(cn) == 4 and c == 2:
res = alt_allele + "x2" + "/" + "*90+*1"
elif int(cn) == 4 and c >= 3:
res = alt_allele + "x3" + "/" + "*90"
else:
val1 = test_list2[0]
val2 = round(val1 * int(cn))
if int(cn) == 3 and val2 == 1:
res = '*1/*90+*1'
elif int(cn) == 3 and val2 == 2:
res = '*90/*90+*1'
elif int(cn) == 4 and val2 == 1:
res = '*1x2/*90+*1'
elif int(cn) == 4 and val2 == 2:
res = '*90+*1/*90+*1'
elif int(cn) == 4 and val2 == 3:
res = '*90x2/*90+*1'
elif int(cn) == 3 and val2 == 3:
res = '*90/*90x2'
elif int(cn) == 4 and val2 == 4:
res = '*90/*90x3'
return res
def tandem_57_10(in_list, alt_allele, cn):
test_list1 = []
test_list2 = []
test_list3 = []
for i in in_list:
test_list1.append(i[0])
test_list2.append(abs(float(i[-2])))
test_list3.append(i[-1])
if len(test_list1) > 1:
index1 = test_list1.index('42129906~G>A')
a = test_list3[index1]
test_list3.pop(index1)
index2 = test_list1.index('42130692~G>A')
m = test_list3[index2]
test_list3.pop(index2)
b = max(test_list3)
c = round(b/a)
p = round(m/a)
if int(cn) == 3 and c == 1 and p > 1:
res = alt_allele + "/" + "*57+*10"
elif int(cn) == 3 and c > 1 and p == 1:
res = alt_allele + "x2" + "/" + "*57"
elif int(cn) == 4 and c == 2 and p > 1:
res = alt_allele + "x2" + "/" + "*57+*10"
elif int(cn) == 4 and c >= 3 and p == 1:
res = alt_allele + "x3" + "/" + "*57"
elif int(cn) == 4 and p == 1 and alt_allele == '*10':
res = "*57+*10" + "/" + "*57+*10"
elif int(cn) == 4 and p > 1 and alt_allele == '*10':
res = "*10x2" + "/" + "*57+*10"
else:
res = alt_allele + "/" + "*57+*10"
return res
def hybrid_test_83_single(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_star39'
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_83_single'
else:
return 'norm_star39'
def hybrid_test_83(sv_dup, cn, av_cov, cn_ex9_3pr):
if int(round(float(cn_ex9_3pr))) == int(cn):
return 'norm_star39'
elif ((int(cn) - 1) - 0.35) < float(cn_ex9_3pr) < ((int(cn) - 1) + 0.5):
return 'hyb_83'
else:
return 'norm_star39'