pgx-main from prod added
This commit is contained in:
78
pgx-main/scripts/cyp1a2/b37/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp1a2/b37/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1B.v1_1B.v1'
|
||||
allele_res = '*1B/*1B'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
370
pgx-main/scripts/cyp1a2/b37/bin/snv_def_modules.py
Normal file
370
pgx-main/scripts/cyp1a2/b37/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,370 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1B.v1_1B.v1']
|
||||
allele_res = "*1B/*1B"
|
||||
return ["".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1A.v1_1G.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1A.v1_1J.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1A.v1_1V.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1B.v1_1J.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1B.v1_1W.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1F.v1_1W.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
if chkList(score) == "Equal" and soln_list1[0] == "1A.v1_1W.v1":
|
||||
elem = "1.v1_1.v1"
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
return [soln_list1, elem, result_dip];
|
||||
|
||||
|
||||
|
||||
elif chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
71
pgx-main/scripts/cyp1a2/b37/bin/stellarpgx.py
Normal file
71
pgx-main/scripts/cyp1a2/b37/bin/stellarpgx.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP1A2 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp1a2/b37/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp1a2/b37/bin/sv_modules.py
Normal file
78
pgx-main/scripts/cyp1a2/hg38/bin/bkg_modules.py
Normal file
78
pgx-main/scripts/cyp1a2/hg38/bin/bkg_modules.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_backgroud_alleles(database, core_vars):
|
||||
|
||||
dbs = []
|
||||
dbs_temp = []
|
||||
|
||||
core_vars_list = core_vars.split(";")
|
||||
core_temp1 = core_vars_list[-1][:-4]
|
||||
core_temp2 = core_vars_list[0][:-4]
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
for record in dbs:
|
||||
temp_rec = record[1]
|
||||
|
||||
if core_temp1 and core_temp2 in temp_rec:
|
||||
dbs_temp.append(record)
|
||||
|
||||
|
||||
scores = []
|
||||
candidates = []
|
||||
cand_vars = []
|
||||
|
||||
for elem in dbs_temp:
|
||||
candidates.append(elem[0])
|
||||
record_core_var = elem[1].split(";")
|
||||
cand_vars.append(record_core_var)
|
||||
|
||||
counter = 0
|
||||
|
||||
for i in record_core_var:
|
||||
if i in core_vars_list:
|
||||
counter += 3
|
||||
elif i[:-4] in core_vars:
|
||||
counter += 1
|
||||
else:
|
||||
counter += -2
|
||||
|
||||
scores.append(counter)
|
||||
|
||||
cand_diplos = []
|
||||
diplo_vars2 = []
|
||||
|
||||
if len(scores) == 0:
|
||||
diplo1 = '1A.v1_1A.v1'
|
||||
allele_res = '*1A/*1A'
|
||||
|
||||
else:
|
||||
max_score = max(scores)
|
||||
|
||||
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
|
||||
|
||||
for i in indices:
|
||||
diplo = candidates[i]
|
||||
diplo_vars1 = len(cand_vars[i])
|
||||
cand_diplos.append(diplo)
|
||||
diplo_vars2.append(diplo_vars1)
|
||||
|
||||
min_index = diplo_vars2.index(min(diplo_vars2))
|
||||
|
||||
diplo1 = cand_diplos[min_index]
|
||||
|
||||
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
|
||||
return [allele_res, diplo1];
|
||||
305
pgx-main/scripts/cyp1a2/hg38/bin/snv_def_modules.py
Normal file
305
pgx-main/scripts/cyp1a2/hg38/bin/snv_def_modules.py
Normal file
@@ -0,0 +1,305 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
|
||||
|
||||
def get_core_variants(infile, cn):
|
||||
core_vars = []
|
||||
for line in open(infile, "r"):
|
||||
line = line.strip()
|
||||
core_vars.append(line)
|
||||
core_vars = ";".join(sorted(core_vars))
|
||||
|
||||
if int(cn) == 1:
|
||||
core_vars = core_vars.replace("~0/1", "~1/1")
|
||||
|
||||
return core_vars
|
||||
|
||||
def get_all_vars_gt(infile_full_gt):
|
||||
all_vars_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_vars_gt.append(line)
|
||||
all_vars_gt = ";".join(sorted(all_vars_gt))
|
||||
return all_vars_gt
|
||||
|
||||
def cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn):
|
||||
|
||||
|
||||
f = open(infile_spec, "r")
|
||||
|
||||
all_variants = []
|
||||
|
||||
for line in open(infile_full, "r"):
|
||||
line.strip()
|
||||
all_variants.append(line)
|
||||
|
||||
if os.stat(infile).st_size == 0:
|
||||
cand_res = ['1.v1_1.v1']
|
||||
allele_res = "*1/*1"
|
||||
return ["".join(cand_res), allele_res];
|
||||
sys.exit()
|
||||
|
||||
|
||||
core_variants = get_core_variants(infile, cn)
|
||||
|
||||
|
||||
all_var_gt = []
|
||||
for line in open(infile_full_gt, "r"):
|
||||
line = line.strip()
|
||||
all_var_gt.append(line)
|
||||
|
||||
|
||||
dbs = []
|
||||
|
||||
for line in open(database, "r"):
|
||||
line = line.strip().split("\t")
|
||||
dbs.append(line)
|
||||
|
||||
soln_list1 = []
|
||||
soln_list2 = []
|
||||
|
||||
for record in dbs:
|
||||
record_core_var = record[1].split(";")
|
||||
record_core_var = ";".join(sorted(record_core_var))
|
||||
if record_core_var == core_variants:
|
||||
diplo = record[0]
|
||||
full_dip = record[2]
|
||||
soln_list1.append(record[0])
|
||||
soln_list2.append(record[2])
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
diff_alleles_check = False
|
||||
|
||||
def chkList(lst):
|
||||
if len(lst) < 0 :
|
||||
diff_alleles_check = True
|
||||
diff_alleles_check = all(ele == lst[0] for ele in lst)
|
||||
|
||||
if(diff_alleles_check):
|
||||
return("Equal")
|
||||
else:
|
||||
return("Not equal")
|
||||
|
||||
|
||||
if len(soln_list1) == 1:
|
||||
diplo = "".join(soln_list1)
|
||||
res1 = [i for i in range(len(diplo)) if diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo)) if diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo[:res2[0]])
|
||||
hap2 = "*" + str (diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 2:
|
||||
print(soln_list1)
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res];
|
||||
|
||||
else:
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
|
||||
if chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in soln_list1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
elif score.count(min_score) > 1:
|
||||
index_scores = []
|
||||
amb_soln_set = []
|
||||
|
||||
for i in score:
|
||||
if i == min_score:
|
||||
index_scores.append(score.index(i))
|
||||
|
||||
alt_solns = []
|
||||
for j in index_scores:
|
||||
elem = tiebreak1[j]
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
alt_solns.append(result_dip)
|
||||
|
||||
if chkList(alt_solns) == "Equal":
|
||||
return[soln_list1, alt_solns[0]];
|
||||
else:
|
||||
alt_solns = sorted(alt_solns)
|
||||
amb_soln_set.append(alt_solns[0])
|
||||
amb_soln_set.append(alt_solns[-1])
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
|
||||
|
||||
elif len(soln_list1) == 3:
|
||||
diplo1 = soln_list1[0]
|
||||
diplo2 = soln_list1[1]
|
||||
diplo3 = soln_list1[2]
|
||||
diplo1_supp_var = soln_list2[0].split(";")
|
||||
diplo2_supp_var = soln_list2[1].split(";")
|
||||
diplo3_supp_var = soln_list2[2].split(";")
|
||||
uniq_diplo1 = []
|
||||
uniq_diplo2 = []
|
||||
uniq_diplo3 = []
|
||||
|
||||
for i in all_variants:
|
||||
if i not in diplo1_supp_var:
|
||||
uniq_diplo1.append(i)
|
||||
|
||||
if i not in diplo2_supp_var:
|
||||
uniq_diplo2.append(i)
|
||||
|
||||
if i not in diplo3_supp_var:
|
||||
uniq_diplo3.append(i)
|
||||
|
||||
|
||||
if len(uniq_diplo1) < len(uniq_diplo2) and len(uniq_diplo1) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo1[:res2[0]])
|
||||
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo1, allele_res];
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) < len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo2)) if diplo2.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo2)) if diplo2.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo2[:res2[0]])
|
||||
hap2 = "*" + str (diplo2[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo2, allele_res]
|
||||
|
||||
elif len(uniq_diplo1) > len(uniq_diplo2) and len(uniq_diplo2) > len(uniq_diplo3):
|
||||
res1 = [i for i in range(len(diplo3)) if diplo3.startswith("_", i)]
|
||||
res2 = [i for i in range(len(diplo3)) if diplo3.startswith(".", i)]
|
||||
hap1 = "*" + str (diplo3[:res2[0]])
|
||||
hap2 = "*" + str (diplo3[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, diplo3, allele_res]
|
||||
|
||||
|
||||
elif len(uniq_diplo1) == len(uniq_diplo2) == len(uniq_diplo3) or (len(uniq_diplo1) != len(uniq_diplo2) == len(uniq_diplo3)) or (len(uniq_diplo1) == len(uniq_diplo2) != len(uniq_diplo3)):
|
||||
|
||||
tiebreak1 = []
|
||||
tiebreak2 = []
|
||||
tiebreak3 = []
|
||||
score = []
|
||||
for line in f:
|
||||
line = line.strip().split()
|
||||
|
||||
if line[2] == core_variants:
|
||||
tiebreak1.append(line[1])
|
||||
tiebreak2.append(line[3])
|
||||
tiebreak3.append(line[0])
|
||||
for full_dip in tiebreak2:
|
||||
diplo_supp_gt = full_dip.split(";")
|
||||
uniq_gt = []
|
||||
for i in all_var_gt:
|
||||
if i not in diplo_supp_gt:
|
||||
uniq_gt.append(i)
|
||||
score_dip = len(uniq_gt)
|
||||
score.append(score_dip)
|
||||
|
||||
min_score = min(score)
|
||||
|
||||
|
||||
elif chkList(score) == "Equal":
|
||||
amb_soln_set = []
|
||||
for elem in tiebreak1:
|
||||
res1 = [i for i in range(len(elem)) if elem.startswith("_", i)]
|
||||
res2 = [i for i in range(len(elem)) if elem.startswith(".", i)]
|
||||
hap1 = "*" + str (elem[:res2[0]])
|
||||
hap2 = "*" + str (elem[res1[0]+1:res2[1]])
|
||||
result_dip = hap1 + "/" + hap2
|
||||
amb_soln_set.append(result_dip)
|
||||
|
||||
allele_res = " or ".join(amb_soln_set)
|
||||
return [soln_list1, tiebreak1, allele_res];
|
||||
|
||||
|
||||
else:
|
||||
minpos = score.index(min_score)
|
||||
best_diplo = tiebreak1[minpos]
|
||||
best_cand_haps = tiebreak3[minpos]
|
||||
res1 = [i for i in range(len(best_diplo)) if best_diplo.startswith("_", i)]
|
||||
res2 = [i for i in range(len(best_diplo)) if best_diplo.startswith(".", i)]
|
||||
hap1 = "*" + str (best_diplo[:res2[0]])
|
||||
hap2 = "*" + str (best_diplo[res1[0]+1:res2[1]])
|
||||
allele_res = hap1 + "/" + hap2
|
||||
return [soln_list1, best_cand_haps, allele_res];
|
||||
70
pgx-main/scripts/cyp1a2/hg38/bin/stellarpgx.py
Normal file
70
pgx-main/scripts/cyp1a2/hg38/bin/stellarpgx.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from snv_def_modules import *
|
||||
from bkg_modules import *
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
print("CYP1A2 Star Allele Calling with StellarPGx\n")
|
||||
|
||||
print("--------------------------------------------\n")
|
||||
|
||||
|
||||
|
||||
database = sys.argv[1]
|
||||
infile = sys.argv[2]
|
||||
infile_full = sys.argv[3]
|
||||
infile_full_gt = sys.argv[4]
|
||||
infile_spec = sys.argv[5]
|
||||
|
||||
|
||||
cn = 2
|
||||
|
||||
|
||||
supp_core_vars = get_core_variants(infile, cn)
|
||||
|
||||
print("\nSample core variants:")
|
||||
print(supp_core_vars)
|
||||
|
||||
|
||||
snv_def_calls = cand_snv_allele_calling(database, infile, infile_full, infile_full_gt, infile_spec, cn)
|
||||
|
||||
if snv_def_calls == None:
|
||||
|
||||
bac_alleles = get_backgroud_alleles(database, supp_core_vars)
|
||||
|
||||
if bac_alleles == None:
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution")
|
||||
|
||||
|
||||
else:
|
||||
print("\nCandidate alleles:")
|
||||
print("[" + bac_alleles[-1] + "]")
|
||||
|
||||
print("\nResult:")
|
||||
print("Possible novel allele or suballele present: interpret with caution; experimental validation and expert review through PharmVar is recommended")
|
||||
print("\nLikely background alleles:")
|
||||
print("[" + bac_alleles[0] + "]")
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
|
||||
snv_cand_alleles = snv_def_calls[0]
|
||||
|
||||
print("\nCandidate alleles:")
|
||||
print(snv_cand_alleles)
|
||||
|
||||
|
||||
snv_def_alleles = snv_def_calls[-1]
|
||||
|
||||
dip_variants = get_all_vars_gt(infile_full_gt)
|
||||
|
||||
|
||||
print("\nResult:")
|
||||
|
||||
print(snv_def_alleles)
|
||||
0
pgx-main/scripts/cyp1a2/hg38/bin/sv_modules.py
Normal file
0
pgx-main/scripts/cyp1a2/hg38/bin/sv_modules.py
Normal file
Reference in New Issue
Block a user