81 lines
2.6 KiB
Python
81 lines
2.6 KiB
Python
import marimo
|
|
|
|
__generated_with = "0.14.16"
|
|
app = marimo.App(width="medium")
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
import os
|
|
import json
|
|
import marimo as mo
|
|
import pandas as pd
|
|
return json, mo, os, pd
|
|
|
|
|
|
@app.cell
|
|
def _(mo):
|
|
mo.md(
|
|
r"""
|
|
# Analyse the NAT2 Results post-update of the JSON with the latest star allele mapping.
|
|
|
|
### What changes were made to the pgx-engine
|
|
- Json updated with the latest [NAT2 allele defintions](https://www.pharmvar.org/gene/NAT2?ref=blog.clinpgx.org)
|
|
- Used these scripts [pharmvar_to_translation_table.py](https://git.serenomica.com/serenomica/pgx-engine/src/branch/master/scripts/pharmvar_to_translation_table.py) >> [translation_to_json.py](https://git.serenomica.com/serenomica/pgx-engine/src/branch/master/scripts/translation_to_json.py) >> new json
|
|
- Updated the diplotypes_to_phenotypes.tsv to have the corrected new allele mapping
|
|
- [Mapping source](https://a.storyblok.com/f/70677/x/301f6834b5/nat2_look-up-table-v1-1.xlsx) PharmVar
|
|
- Done using [update_nat2_d2p_tsv.py](https://git.serenomica.com/serenomica/pgx-engine/src/branch/fix-pharmvar-to-json-scripts/scripts/update_nat2_d2p_tsv.py)
|
|
- All validation samples run on server with updated PGx engine
|
|
|
|
See [PR#3](https://git.serenomica.com/serenomica/pgx-engine/pulls/3) for more details
|
|
"""
|
|
)
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(pd):
|
|
# import mapping file
|
|
nat2_mapping = pd.read_excel("data/nat2_look-up-table-v1-1.xlsx", skiprows=1)
|
|
|
|
# paths to the PGx output
|
|
path_new_nat2 = "data/results_nat2_update_validation/"
|
|
path_old_nat2 = "../reproduce_setup/data/full_validation_data/"
|
|
|
|
# sample list
|
|
with open("../reproduce_setup/data/se_val_samples.txt", "r") as handle:
|
|
samples = [
|
|
s.rstrip().split("/")[-1].replace(".vcf", "")
|
|
for s in handle.readlines()
|
|
]
|
|
print(f"Number of samples: {len(samples)}")
|
|
return path_new_nat2, path_old_nat2, samples
|
|
|
|
|
|
@app.cell
|
|
def _(json, os, path_new_nat2, path_old_nat2, samples):
|
|
def parse_result(dir, sample, genes):
|
|
sample_path = os.path.join(dir, f"{sample}_pgx_result", "output.json")
|
|
|
|
with open(sample_path, "r") as handle:
|
|
sample_data = json.load(handle)
|
|
return {gene: sample_data["called_genotypes"][gene] for gene in genes}
|
|
|
|
|
|
for sample in samples:
|
|
print(
|
|
parse_result(path_old_nat2, sample, ["NAT2"]),
|
|
"\t",
|
|
parse_result(path_new_nat2, sample, ["NAT2"]),
|
|
)
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
return
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run()
|