pg-main from prod server added
This commit is contained in:
54
pgx-main/create_overview.py
Normal file
54
pgx-main/create_overview.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import zipfile
|
||||
import functools as ft
|
||||
|
||||
|
||||
dir = "pgx_results"
|
||||
diplotypes = []
|
||||
|
||||
for z in os.listdir(dir):
|
||||
s = z[:-4]
|
||||
z = f"{dir}/{z}"
|
||||
d = z[:-4]
|
||||
if not z.endswith('.zip'):
|
||||
continue
|
||||
|
||||
print("Working on sample ", z)
|
||||
with zipfile.ZipFile(z, "r") as zip_ref:
|
||||
zip_ref.extractall(d)
|
||||
os.unlink(z)
|
||||
df = pd.read_csv(f"{d}/diplotypes.tsv", sep="\t")
|
||||
df = df.groupby("name").agg({"genotype": lambda x: ", ".join(list(set(x)))})
|
||||
df.rename(columns={"genotype": s}, inplace=True)
|
||||
diplotypes.append(df)
|
||||
|
||||
df_final = ft.reduce(lambda left, right: left.join(right, how="outer"), diplotypes)
|
||||
dfx = df_final.transpose().sort_index()
|
||||
|
||||
fulgent_ids = [f for f in pd.read_csv("pgx_fulgent_panel.tsv", sep="\t")["variants"].to_list() if f.startswith("rs")]
|
||||
|
||||
a = []
|
||||
b = []
|
||||
c = []
|
||||
e = []
|
||||
for d in dfx.index:
|
||||
filename = f"{dir}/{d}/pharmgkb_annotations.json"
|
||||
x = pd.read_json(filename)
|
||||
x = x[x["Variant"].apply(lambda t: t.startswith("rs"))]
|
||||
#x = x[x["Level of Evidence"] < "3"]
|
||||
y = x[x["Variant"].apply(lambda t: t.split()[0] in fulgent_ids)].copy()
|
||||
b.append(", ".join(sorted(x.Variant.unique())))
|
||||
e.append(", ".join(sorted(y.Variant.unique())))
|
||||
|
||||
x = x[x["Phenotype(s)"].isna()]
|
||||
y = y[y["Phenotype(s)"].isna()]
|
||||
a.append(", ".join(sorted(x.Variant.unique())))
|
||||
c.append(", ".join(sorted(y.Variant.unique())))
|
||||
|
||||
dfx["rsids (no phenotype)"] = a
|
||||
dfx["rsids (all)"] = b
|
||||
dfx["rsids (fulgent/no phenotype)"] = c
|
||||
dfx["rsids (fulgent/all)"] = e
|
||||
|
||||
dfx.to_csv("pgx_diplotypes_rsids.tsv", sep="\t")
|
||||
Reference in New Issue
Block a user