Files
reproduce_setup/pgx-main/scripts/cyp1a2/b37/bin/bkg_modules.py
2025-08-18 13:09:30 +02:00

79 lines
1.8 KiB
Python

#!/usr/bin/env python3
import os
import sys
def get_backgroud_alleles(database, core_vars):
dbs = []
dbs_temp = []
core_vars_list = core_vars.split(";")
core_temp1 = core_vars_list[-1][:-4]
core_temp2 = core_vars_list[0][:-4]
for line in open(database, "r"):
line = line.strip().split("\t")
dbs.append(line)
for record in dbs:
temp_rec = record[1]
if core_temp1 and core_temp2 in temp_rec:
dbs_temp.append(record)
scores = []
candidates = []
cand_vars = []
for elem in dbs_temp:
candidates.append(elem[0])
record_core_var = elem[1].split(";")
cand_vars.append(record_core_var)
counter = 0
for i in record_core_var:
if i in core_vars_list:
counter += 3
elif i[:-4] in core_vars:
counter += 1
else:
counter += -2
scores.append(counter)
cand_diplos = []
diplo_vars2 = []
if len(scores) == 0:
diplo1 = '1B.v1_1B.v1'
allele_res = '*1B/*1B'
else:
max_score = max(scores)
indices = [i for i, x in enumerate(scores) if x == max_score or x == max_score - 1]
for i in indices:
diplo = candidates[i]
diplo_vars1 = len(cand_vars[i])
cand_diplos.append(diplo)
diplo_vars2.append(diplo_vars1)
min_index = diplo_vars2.index(min(diplo_vars2))
diplo1 = cand_diplos[min_index]
res1 = [i for i in range(len(diplo1)) if diplo1.startswith("_", i)]
res2 = [i for i in range(len(diplo1)) if diplo1.startswith(".", i)]
hap1 = "*" + str (diplo1[:res2[0]])
hap2 = "*" + str (diplo1[res1[0]+1:res2[1]])
allele_res = hap1 + "/" + hap2
return [allele_res, diplo1];