feat: code added that updates the NAT2 whitelist in the pgx-engine
This commit is contained in:
73
src/create_new_nat2_whitelst.py
Normal file
73
src/create_new_nat2_whitelst.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
|
||||
|
||||
NAT2_JSON_PATH = "/home/darren/Documents/2_repos/serenomica/pgx-engine/resources/allele_whitelist.json"
|
||||
NAT2_LOOKUP = "/home/darren/Documents/3_projects/3_PGx/1_Clean_up/pgx_nat2_issue/data/nat2_look-up-table-v1-1.xlsx"
|
||||
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO
|
||||
)
|
||||
|
||||
|
||||
def main(old_white: list[str], ref_allele: str, overwrite: bool = False) -> None:
|
||||
logging.info("Script started...")
|
||||
logging.info(f"Loading lookup and filtering to whitelist ('{NAT2_LOOKUP}')...")
|
||||
lookup = pd.read_excel(NAT2_LOOKUP, skiprows=1)
|
||||
|
||||
lookup_re = re.compile(("|").join([f"\\{o}+" for o in old_white]))
|
||||
|
||||
lookup = lookup.loc[
|
||||
(lookup["Transferred\nYes/No"] == "yes")
|
||||
& (lookup["Legacy \nname"].str.contains(lookup_re))
|
||||
]
|
||||
|
||||
logging.info(
|
||||
f"Saving copy of filtered whitelist: '{(save_path:=os.path.join(
|
||||
os.path.dirname(NAT2_LOOKUP),
|
||||
f"nat2_whitelist_{datetime.now().strftime('%Y%-m-%d')}.tsv",
|
||||
))}')..."
|
||||
)
|
||||
lookup.to_csv(
|
||||
save_path,
|
||||
sep="\t",
|
||||
index=False,
|
||||
)
|
||||
|
||||
logging.info(f"Reading whitelist ('{NAT2_JSON_PATH}')...")
|
||||
with open(NAT2_JSON_PATH, "r") as handle:
|
||||
whitelist = json.load(handle)
|
||||
genes_pre = set(whitelist.keys())
|
||||
|
||||
logging.info(f"Updating NAT2...")
|
||||
new_entry_json = {
|
||||
"reference": ref_allele,
|
||||
"alleles": sorted(lookup["PharmVar \nName1"].tolist(), reverse=True),
|
||||
}
|
||||
|
||||
whitelist["NAT2"] = new_entry_json
|
||||
genes_post = set(whitelist.keys())
|
||||
|
||||
# sanity check
|
||||
assert len(genes_pre) == len(genes_post)
|
||||
assert genes_pre == genes_post
|
||||
|
||||
if overwrite:
|
||||
logging.info("Overwrite active, saving new white list...")
|
||||
with open(NAT2_JSON_PATH, "w") as handle:
|
||||
json.dump(whitelist, handle)
|
||||
else:
|
||||
logging.info("Overwrite NOT active, printing json to screen...")
|
||||
print(json.dumps(whitelist, indent=2))
|
||||
|
||||
logging.info("Script completed!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
old_white = ["*4", "*5", "*6", "*7", "*14"]
|
||||
ref_allele = "*4.001"
|
||||
main(old_white, ref_allele, overwrite=True)
|
||||
Reference in New Issue
Block a user