diff --git a/.gitignore b/.gitignore
index cac109b..87c9263 100644
--- a/.gitignore
+++ b/.gitignore
@@ -188,3 +188,5 @@ ipython_config.py
 # Remove previous ipynb_checkpoints
 #   git rm -r .ipynb_checkpoints/
 
+data
+__marimo__
\ No newline at end of file
diff --git a/cmds.sh b/cmds.sh
new file mode 100644
index 0000000..8651200
--- /dev/null
+++ b/cmds.sh
@@ -0,0 +1,13 @@
+# create the translation table after fixing the issue with the variant allele encoding
+python /home/darren/Documents/2_repos/serenomica/pgx-engine/scripts/pharmvar_to_translation_table.py \
+    data/NAT2-6.2.15/GRCh38/NAT2.NC_000008.11.haplotypes.tsv \
+    "NAT2" \
+    "chr8"
+
+# create the json (needed to change the saving location)
+python /home/darren/Documents/2_repos/serenomica/pgx-engine/scripts/translation_to_json.py \
+    temp_NAT2_translation_table.tsv \
+    "NAT2" \
+    "chr8" \
+    "GRCh38" \
+    "NC_000008.11"
diff --git a/notebooks/translation_table_eda.py b/notebooks/translation_table_eda.py
new file mode 100644
index 0000000..dc11117
--- /dev/null
+++ b/notebooks/translation_table_eda.py
@@ -0,0 +1,107 @@
+import marimo
+
+__generated_with = "0.14.16"
+app = marimo.App(width="medium")
+
+
+@app.cell
+def _():
+    import marimo as mo
+    import pandas as pd
+    return mo, pd
+
+
+@app.cell
+def _(mo):
+    mo.md(
+        r"""
+    # EDA of the NAT2 Haplotypes table
+
+    Downloaded from [PharmVar](https://www.pharmvar.org/gene/NAT2?ref=blog.clinpgx.org)
+
+    Ajit had scripts to convert from this format into the json used by PharmCAT
+
+    `pgx-engine/scripts/pharmvar_to_translation_table.py`
+    `pgx-engine/scripts/translation_to_json`
+
+    Where the pharmvar haplotypes table should go into the first and the translation table can be fed into the second one to get the json.
+
+    Running these causes and error because the alts element is None
+
+    ```python
+    Traceback (most recent call last):
+      File "/home/darren/Documents/2_repos/serenomica/pgx-engine/scripts/translation_to_json.py", line 100, in <module>
+        main()
+      File "/home/darren/Documents/2_repos/serenomica/pgx-engine/scripts/translation_to_json.py", line 90, in main
+        convert(
+      File "/home/darren/Documents/2_repos/serenomica/pgx-engine/scripts/translation_to_json.py", line 40, in convert
+        variants = table.apply(make_variant, axis=1).to_list()
+      File "/home/darren/.venv/py310/lib/python3.10/site-packages/pandas/core/frame.py", line 10381, in apply
+        return op.apply().__finalize__(self, method="apply")
+      File "/home/darren/.venv/py310/lib/python3.10/site-packages/pandas/core/apply.py", line 916, in apply
+        return self.apply_standard()
+      File "/home/darren/.venv/py310/lib/python3.10/site-packages/pandas/core/apply.py", line 1063, in apply_standard
+        results, res_index = self.apply_series_generator()
+      File "/home/darren/.venv/py310/lib/python3.10/site-packages/pandas/core/apply.py", line 1081, in apply_series_generator
+        results[i] = self.func(v, *self.args, **self.kwargs)
+      File "/home/darren/Documents/2_repos/serenomica/pgx-engine/scripts/translation_to_json.py", line 20, in make_variant
+        alts = row["alts"].split(",")
+    AttributeError: 'NoneType' object has no attribute 'split'
+    ```
+
+    """
+    )
+    return
+
+
+@app.cell
+def _(pd):
+    # load the Pharmvar table
+    pharmvar = pd.read_csv(
+        "data/NAT2-6.2.15/GRCh38/NAT2.NC_000008.11.haplotypes.tsv",
+        sep="\t",
+        comment="#",
+    )
+    print(pharmvar.shape)
+    pharmvar.drop_duplicates(inplace=True)
+    print(pharmvar.shape)
+    pharmvar.head()
+    return (pharmvar,)
+
+
+@app.cell
+def _(pharmvar):
+    # code from Ajit that doesn't take the alt alleles and are then missing in the output - plus dangerously drops dups on position only
+    # also dangerously compresses over the start position and would loose any variant that has multiple possible alts (there are multiple of these)
+    position_dict = (
+        pharmvar[pharmvar.rsID.notna()]
+        .drop_duplicates(["Variant Start"])
+        .set_index("Variant Start")[["rsID", "Reference Allele", "Variant Allele"]]
+        .to_dict("index")
+    )
+    position_dict
+    return
+
+
+@app.cell
+def _(pharmvar):
+    # my solution to gather all possible variant alleles under each variant start without de-duplicating
+    correct_pos_dict = (
+        pharmvar[pharmvar.rsID.notna()]
+        .groupby(["rsID", "Variant Start", "Reference Allele"])["Variant Allele"]
+        .agg(lambda x: ",".join(x.unique()))
+        .reset_index()
+        .set_index("Variant Start")[["rsID", "Reference Allele", "Variant Allele"]]
+        .to_dict("index")
+    )
+    correct_pos_dict
+    return
+
+
+@app.cell
+def _():
+    return
+
+
+if __name__ == "__main__":
+    app.run()