pg-main from prod server added
This commit is contained in:
78
pgx-main/scripts/ugt1a1/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/ugt1a1/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1S.v1_1S.v1'
|
||||
allele_res = '*1/*1'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
337
pgx-main/scripts/ugt1a1/hg38/bin/snv_def_modules.py
Normal file
337
pgx-main/scripts/ugt1a1/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,337 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1S.v1_1S.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
min_score_pos = [i for i, x in enumerate(score) if x == min_score]
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
amb_soln_set = sorted(set(amb_soln_set))
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) >= 2:
|
||||
amb_soln_set = []
|
||||
temp_set = []
|
||||
|
||||
for i in min_score_pos:
|
||||
temp_set.append(tiebreak1[i])
|
||||
|
||||
# temp_set.append(tiebreak1[0])
|
||||
# temp_set.append(tiebreak1[-1])
|
||||
|
||||
for elem in temp_set:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
amb_soln_set = sorted(set(amb_soln_set))
|
||||
|
||||
if len(amb_soln_set) == 1:
|
||||
minpos = min_score_pos[0]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
return [soln_list1, best_cand_haps, amb_soln_set[0]];
|
||||
|
||||
else:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = min_score_pos[0]
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
#print(line)
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
min_score_pos = [i for i, x in enumerate(score) if x == min_score]
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
amb_soln_set = sorted(set(amb_soln_set))
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
elif score.count(min_score) >= 2:
|
||||
amb_soln_set = []
|
||||
temp_set = []
|
||||
|
||||
for i in min_score_pos:
|
||||
temp_set.append(tiebreak1[i])
|
||||
|
||||
for elem in temp_set:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
amb_soln_set = sorted(set(amb_soln_set))
|
||||
|
||||
|
||||
if len(amb_soln_set) == 1:
|
||||
minpos = min_score_pos[0]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
return [soln_list1, best_cand_haps, amb_soln_set[0]];
|
||||
|
||||
else:
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, soln_list1, allele_res];
|
||||
|
||||
|
||||
|
||||
else:
|
||||
minpos = min_score_pos[0]
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
247
pgx-main/scripts/ugt1a1/hg38/bin/stellarpgx.py
Normal file
247
pgx-main/scripts/ugt1a1/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,247 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from sv_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("UGT1A1 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
sv_del = sys.argv[6]
|
||||
sv_dup = sys.argv[7]
|
||||
cov_file = sys.argv[8]
|
||||
hap_dbs = sys.argv[9]
|
||||
act_score = sys.argv[10]
|
||||
|
||||
|
||||
cn = get_total_CN(cov_file)[0]
|
||||
|
||||
print("Initially computed CN = {}".format(cn))
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
best_diplos = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(best_diplos)
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
pass
|
||||
else:
|
||||
snv_cand_alleles = snv_def_calls[1]
|
||||
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
|
||||
av_cov = get_total_CN(cov_file)[1]
|
||||
cov_e1_e2 = get_total_CN(cov_file)[3]
|
||||
cov_e3_e5 = get_total_CN(cov_file)[4]
|
||||
|
||||
|
||||
gene_alleles = ""
|
||||
|
||||
|
||||
if snv_def_alleles != '*1/*1' and cn != '0':
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
# if cn == '2' and snv_def_alleles == '*1/*1':
|
||||
|
||||
if cn == '2':
|
||||
|
||||
if 'or' in snv_def_alleles:
|
||||
print (snv_def_alleles)
|
||||
|
||||
else:
|
||||
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
|
||||
if snv_def_alleles[0] == '*1' or snv_def_alleles[1] == '*1':
|
||||
ind_star2 = snv_def_alleles.index('*1')
|
||||
ind_other = 1 - ind_star2
|
||||
|
||||
test_UGT1A1_hyb = hybrid_UGT1A1_hyb_test1(cov_e1_e2, cov_e3_e5)
|
||||
test_new_hyb = hybrid_new_test1(cov_e1_e2, cov_e3_e5)
|
||||
|
||||
|
||||
if test_UGT1A1_hyb == 'norm_var' and test_new_hyb == 'norm_var':
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_UGT1A1_hyb == 'hyb_UGT1A1_hyb':
|
||||
gene_alleles = snv_def_alleles[ind_other] + "/" + "*UGT1A1_hyb"
|
||||
print(gene_alleles)
|
||||
|
||||
elif test_UGT1A1_hyb == 'hyb_UGT1A1_hyb_2' and snv_def_alleles == "*1/*1":
|
||||
gene_alleles = "*UGT1A1_hyb/*UGT1A1_hyb"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
else:
|
||||
gene_alleles = "/".join(snv_def_alleles)
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif cn == '0':
|
||||
del_confirm = del_test(sv_del)
|
||||
if del_confirm == '*(full_gene_del)/*(full_gene_del)':
|
||||
gene_alleles = '*UGT1A1_del/*UGT1A1_del'
|
||||
print (gene_alleles)
|
||||
|
||||
elif del_confirm == '*(full_gene_del)':
|
||||
gene_alleles = '*UGT1A1_del' + "/" + "*other"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
gene_alleles = "*UGT1A1_del/*UGT1A1_del"
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif cn == '1':
|
||||
del_confirm = del_test(sv_del)
|
||||
|
||||
if "or" in snv_def_alleles and del_confirm == 'None':
|
||||
print (snv_def_alleles + "\t" + "Possible UGT1A1 gene deletion present")
|
||||
|
||||
elif "or" not in snv_def_alleles and del_confirm == 'None':
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
gene_alleles = snv_def_alleles[0] + "/" + "*UGT1A1_del"
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
gene_alleles = samp_allele1 + "/" + "*UGT1A1_del"
|
||||
print(gene_alleles)
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
if del_confirm == "*(full_gene_del)/*(full_gene_del)":
|
||||
del_confirm = "*UGT1A1_del"
|
||||
gene_alleles = del_confirm + "/" + snv_def_alleles[0]
|
||||
print(gene_alleles)
|
||||
|
||||
elif snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
samp_allele1 = del_adv_test(hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], supp_core_vars)
|
||||
|
||||
if del_confirm == "*(full_gene_del)/*(full_gene_del)":
|
||||
del_confirm = "*UGT1A1_del"
|
||||
gene_alleles = del_confirm + "/" + samp_allele1
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
|
||||
elif (int(cn) == 3 or int(cn) == 4) and snv_def_alleles != None:
|
||||
|
||||
in_list = dup_test_init(sv_dup, av_cov)
|
||||
|
||||
orig = snv_def_alleles
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_3_4(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
|
||||
phased_dup1 = phased_dup.split("/")
|
||||
|
||||
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
|
||||
rt_2 = int(cn) - 1
|
||||
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
|
||||
gene_alleles = phased_dup
|
||||
|
||||
print(gene_alleles)
|
||||
|
||||
|
||||
elif int(cn) > 4 and snv_def_alleles != None:
|
||||
|
||||
if "or" in snv_def_alleles:
|
||||
print (snv_def_alleles + "\t" + "Duplication present")
|
||||
|
||||
else:
|
||||
snv_def_alleles = snv_def_alleles.split("/")
|
||||
snv_cand_alleles = "".join(snv_cand_alleles)
|
||||
snv_cand_alleles = snv_cand_alleles.split("_")
|
||||
|
||||
if snv_def_alleles[0] != snv_def_alleles[1]:
|
||||
|
||||
phased_dup = dup_test_cn_n(sv_dup, hap_dbs, snv_cand_alleles[0], snv_cand_alleles[1], snv_def_alleles[0], snv_def_alleles[1], cn, av_cov, in_list)
|
||||
elif snv_def_alleles[0] == snv_def_alleles[1]:
|
||||
rt_2 = int(cn) - 1
|
||||
phased_dup = (snv_def_alleles[0] + "/" + snv_def_alleles[1] + "x" + str(rt_2))
|
||||
|
||||
gene_alleles = phased_dup
|
||||
print(phased_dup)
|
||||
|
||||
|
||||
|
||||
elif int(cn) > 2 and snv_def_alleles == None:
|
||||
|
||||
print("Possible rare UGT1A1 hybrid present")
|
||||
331
pgx-main/scripts/ugt1a1/hg38/bin/sv_modules.py
Normal file
331
pgx-main/scripts/ugt1a1/hg38/bin/sv_modules.py
Normal file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
|
||||
|
||||
def get_total_CN(cov_file):
|
||||
|
||||
all_reg =[]
|
||||
for line in open(cov_file, "r"):
|
||||
line = line.strip().split()
|
||||
all_reg.append(line)
|
||||
|
||||
av_ugt1a1_cov = float(all_reg[0][3])/(float(all_reg[0][2]) - float(all_reg[0][1]))
|
||||
av_vdr_cov = float(all_reg[1][3])/(float(all_reg[1][2]) - float(all_reg[1][1]))
|
||||
av_egfr_cov = float(all_reg[2][3])/(float(all_reg[2][2]) - float(all_reg[2][1]))
|
||||
av_e1_e2 = float(all_reg[3][3])/(float(all_reg[3][2]) - float(all_reg[3][1]))
|
||||
av_e3_e5 = float(all_reg[4][3])/(float(all_reg[4][2]) - float(all_reg[4][1]))
|
||||
|
||||
av_ctrl_cov = (av_vdr_cov + av_egfr_cov)/2
|
||||
|
||||
comp_av = av_ugt1a1_cov/av_ctrl_cov
|
||||
temp_cn = 2 * comp_av
|
||||
total_cn = round(temp_cn)
|
||||
|
||||
|
||||
return [str(int(total_cn)), round(av_ugt1a1_cov), round(av_ctrl_cov), str(av_e1_e2), str(av_e3_e5)];
|
||||
|
||||
|
||||
def del_test(sv_del):
|
||||
|
||||
if os.stat(sv_del).st_size == 0:
|
||||
return "None"
|
||||
|
||||
else:
|
||||
for line in open(sv_del, "r"):
|
||||
if "COVERAGE" in line:
|
||||
line = line.strip().split()
|
||||
|
||||
ABHom = line[-1]
|
||||
ABHet = line[-2]
|
||||
GT = line[2]
|
||||
DP = int(line[3])
|
||||
|
||||
if float(ABHom) == 1.0:
|
||||
return "*(full_gene_del)/*(full_gene_del)"
|
||||
elif float(ABHom) == -1.0:
|
||||
return "*(full_gene_del)"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
hap_adv_list = []
|
||||
hap_t1 = []
|
||||
|
||||
|
||||
def del_adv_test(hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, core_vars):
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_adv_list.append(line)
|
||||
|
||||
a1 = core_vars.split(";")
|
||||
|
||||
for i in a1:
|
||||
if i[-3:] == "0/1":
|
||||
hap_t1.append(i[:-4])
|
||||
|
||||
for elem in hap_adv_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_t1 = (elem[2]).split(';')
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_t2 = (elem[2]).split(';')
|
||||
|
||||
if hap_t1[0] in list_t1:
|
||||
return test_allele1
|
||||
|
||||
elif hap_t1[0] in list_t2:
|
||||
return test_allele2
|
||||
|
||||
|
||||
het_hom_list = []
|
||||
het_hom_list_new = []
|
||||
|
||||
def dup_test_init(sv_dup, av_cov):
|
||||
for line in open(sv_dup, "r"):
|
||||
if "COVERAGE" in line:
|
||||
continue
|
||||
elif "AGGREGATED" in line:
|
||||
continue
|
||||
|
||||
else:
|
||||
fields = line.strip().split()
|
||||
het_hom_list.append(fields)
|
||||
|
||||
test_list1 = []
|
||||
|
||||
for i in het_hom_list:
|
||||
test_list1.append(int(i[2]))
|
||||
|
||||
av_read_cov = sum(test_list1)/len(test_list1)
|
||||
norm_cov = (av_cov + av_read_cov)/2
|
||||
|
||||
for i in het_hom_list:
|
||||
supp_reads = round(float(i[-2])*int(i[2]))
|
||||
i.append(round(supp_reads/norm_cov, 3))
|
||||
i.append(supp_reads)
|
||||
het_hom_list_new.append(i)
|
||||
|
||||
|
||||
return (het_hom_list_new)
|
||||
|
||||
|
||||
hap_def_list = []
|
||||
allele_cn_list = []
|
||||
|
||||
def dup_test_cn_3_4(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num))))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
|
||||
def dup_test_cn_n(sv_dup, hap_dbs, cand_allele1, cand_allele2, test_allele1, test_allele2, c_num, av_cov, in_list):
|
||||
|
||||
g = open(hap_dbs, "r")
|
||||
for line in g:
|
||||
line = line.strip().split()
|
||||
hap_def_list.append(line)
|
||||
|
||||
|
||||
test_list1 = []
|
||||
test_list2 = []
|
||||
het_list = []
|
||||
|
||||
|
||||
for i in in_list:
|
||||
if i[1] == "0/1":
|
||||
het_list.append(i)
|
||||
|
||||
for i in het_list:
|
||||
test_list1.append(i[0])
|
||||
test_list2.append(i[-2])
|
||||
|
||||
max_het = max(test_list2)
|
||||
max_het_pos = test_list2.index(max_het)
|
||||
var = test_list1[max_het_pos]
|
||||
|
||||
|
||||
for elem in hap_def_list:
|
||||
if elem[1] == cand_allele1:
|
||||
list_3t = elem
|
||||
list_3t_2 = list_3t[2].split(';')
|
||||
l3 = len(list_3t_2)
|
||||
|
||||
if elem[1] == cand_allele2:
|
||||
list_4t = elem
|
||||
list_4t_2 = list_4t[2].split(';')
|
||||
l4 = len(list_4t_2)
|
||||
|
||||
hdb_list = list_3t_2 + list_4t_2
|
||||
|
||||
index_var = hdb_list.index(var)
|
||||
|
||||
if index_var < l3:
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
elif index_var >= l3:
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(int(round(max_het*int(c_num)-0.15)))
|
||||
|
||||
|
||||
if allele_cn_list[0] == test_allele1:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele2)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
elif allele_cn_list[0] == test_allele2:
|
||||
rt_2 = int(c_num) - allele_cn_list[1]
|
||||
allele_cn_list.append(test_allele1)
|
||||
allele_cn_list.append(rt_2)
|
||||
|
||||
if allele_cn_list[1] == 0:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3] - 1)
|
||||
|
||||
elif allele_cn_list[3] == 0:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1] - 1)
|
||||
|
||||
elif allele_cn_list[1] == 1:
|
||||
res_dip = allele_cn_list[0] + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 1:
|
||||
res_dip = allele_cn_list[2] + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 2:
|
||||
res_dip = allele_cn_list[0] + "x2" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 2:
|
||||
res_dip = allele_cn_list[2] + "x2" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 3:
|
||||
res_dip = allele_cn_list[0] + "x3" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 3:
|
||||
res_dip = allele_cn_list[2] + "x3" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
elif allele_cn_list[1] == 4:
|
||||
res_dip = allele_cn_list[0] + "x4" + "/" + allele_cn_list[2] + "x" + str(allele_cn_list[3])
|
||||
|
||||
elif allele_cn_list[3] == 4:
|
||||
res_dip = allele_cn_list[2] + "x4" + "/" + allele_cn_list[0] + "x" + str(allele_cn_list[1])
|
||||
|
||||
|
||||
else:
|
||||
res_dip = 'check'
|
||||
|
||||
return res_dip
|
||||
|
||||
|
||||
def hybrid_UGT1A1_hyb_test1(cov_e1_e2, cov_e3_e5):
|
||||
|
||||
if 0.85 < float(cov_e1_e2)/float(cov_e3_e5) < 1.2:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.45 < float(cov_e1_e2)/float(cov_e3_e5) < 0.75:
|
||||
return 'hyb_UGT1A1_hyb'
|
||||
|
||||
elif float(cov_e1_e2)/float(cov_e3_e5) < 0.15:
|
||||
return 'hyb_UGT1A1_hyb_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
|
||||
def hybrid_new_test1(cov_e1_e2, cov_e3_e5):
|
||||
|
||||
if 0.85 < float(cov_e1_e2)/float(cov_e3_e5) < 1.2:
|
||||
return 'norm_var'
|
||||
|
||||
elif 0.45 < float(cov_e3_e5)/float(cov_e1_e2) < 0.75:
|
||||
return 'hyb_new'
|
||||
|
||||
elif float(cov_e3_e5)/float(cov_e1_e2) < 0.15:
|
||||
return 'hyb_new_2'
|
||||
|
||||
else:
|
||||
return 'norm_var'
|
||||
|
||||
Reference in New Issue
Block a user