code for checking pipeline outputs added
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -168,3 +168,5 @@ cython_debug/
|
|||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
|
# data directory
|
||||||
|
data/
|
||||||
|
|||||||
31
notebooks/data_inspector.py
Normal file
31
notebooks/data_inspector.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import marimo
|
||||||
|
|
||||||
|
__generated_with = "0.14.16"
|
||||||
|
app = marimo.App(width="medium")
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
import os
|
||||||
|
import marimo as mo
|
||||||
|
import pandas as pd
|
||||||
|
return (mo,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell(hide_code=True)
|
||||||
|
def _(mo):
|
||||||
|
mo.md(
|
||||||
|
r"""
|
||||||
|
# Analyse the Mismatch data
|
||||||
|
|
||||||
|
Data structure
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run()
|
||||||
80
src/check_results_equivilence.py
Normal file
80
src/check_results_equivilence.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
import os
|
||||||
|
from subprocess import run
|
||||||
|
from itertools import combinations
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def get_files_in_path(dir: str, sample: str) -> list[str]:
|
||||||
|
return [i.strip(sample) for i in os.listdir(dir)]
|
||||||
|
|
||||||
|
|
||||||
|
def test_same_file(file1: str, file2: str) -> str:
|
||||||
|
cmp_test = run(
|
||||||
|
f"cmp {file1} {file2}",
|
||||||
|
capture_output=True,
|
||||||
|
shell=True,
|
||||||
|
)
|
||||||
|
if cmp_test.returncode == 0:
|
||||||
|
return "match"
|
||||||
|
elif cmp_test.returncode == 1:
|
||||||
|
return "NO Match"
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"Unexpected return code != 0|1 ({cmp_test.returncode}\t{file1}\t{file2})"
|
||||||
|
)
|
||||||
|
raise RuntimeError
|
||||||
|
|
||||||
|
|
||||||
|
def main(test_sample_paths: str, conditions: list[str]) -> None:
|
||||||
|
with open(test_sample_paths, "r") as handle:
|
||||||
|
samples = [r.strip().split("/")[-1].strip(".vcf") for r in handle.readlines()]
|
||||||
|
conditions_pairwise = list(combinations(conditions, r=2))
|
||||||
|
|
||||||
|
# check all conditions and samples have same files available
|
||||||
|
for condition in conditions:
|
||||||
|
for idx, sample in enumerate(samples):
|
||||||
|
dir = f"data/{condition}/{sample}_pgx_result"
|
||||||
|
if idx == 0:
|
||||||
|
files = get_files_in_path(dir, sample)
|
||||||
|
|
||||||
|
test_files = get_files_in_path(dir, sample)
|
||||||
|
assert all(
|
||||||
|
[file in files for file in test_files]
|
||||||
|
), f"'{sample}' from '{condition}' has unexpected files: {test_files}"
|
||||||
|
|
||||||
|
# check combos are equal
|
||||||
|
results = []
|
||||||
|
for pair in conditions_pairwise:
|
||||||
|
print(f"############ Testing folling condition pair: {pair} ##############")
|
||||||
|
for sample in samples:
|
||||||
|
dir1 = f"data/{pair[0]}/{sample}_pgx_result"
|
||||||
|
dir2 = f"data/{pair[1]}/{sample}_pgx_result"
|
||||||
|
|
||||||
|
for file in os.listdir(dir1):
|
||||||
|
results.append(
|
||||||
|
[
|
||||||
|
pair[0],
|
||||||
|
pair[1],
|
||||||
|
sample,
|
||||||
|
file,
|
||||||
|
test_same_file(f"{dir1}/{file}", f"{dir2}/{file}"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
print(f"############ COMPLETED ##############\n")
|
||||||
|
df = pd.DataFrame(
|
||||||
|
results, columns=["condition1", "condition2", "sample", "file", "matching"]
|
||||||
|
)
|
||||||
|
df.to_csv("data/pairwise_equality.csv", sep=",", index=None)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_sample_paths = "data/test_samples.txt"
|
||||||
|
conditions = [
|
||||||
|
"v1.2.8_git_tag",
|
||||||
|
"validation_data",
|
||||||
|
"v1.2.6_manifest_version",
|
||||||
|
"v1.2.7_mainfest_version",
|
||||||
|
"pgxCleaner_server",
|
||||||
|
]
|
||||||
|
|
||||||
|
main(test_sample_paths, conditions)
|
||||||
Reference in New Issue
Block a user