feat: update code to create the sample comparison without notebook
This commit is contained in:
@@ -25,7 +25,7 @@ def test_same_file(file1: str, file2: str) -> str:
|
||||
raise RuntimeError
|
||||
|
||||
|
||||
def main(test_sample_paths: str, conditions: list[str]) -> None:
|
||||
def main(test_sample_paths: str, conditions: list[str], outpath: str) -> None:
|
||||
with open(test_sample_paths, "r") as handle:
|
||||
samples = [r.strip().split("/")[-1].strip(".vcf") for r in handle.readlines()]
|
||||
conditions_pairwise = list(combinations(conditions, r=2))
|
||||
@@ -64,16 +64,41 @@ def main(test_sample_paths: str, conditions: list[str]) -> None:
|
||||
df = pd.DataFrame(
|
||||
results, columns=["condition1", "condition2", "sample", "file", "matching"]
|
||||
)
|
||||
df.to_csv("data/pairwise_equality.csv", sep=",", index=None)
|
||||
df.to_csv(f"{outpath}.csv", sep=",", index=None)
|
||||
|
||||
samples = df["sample"].unique()
|
||||
print(samples)
|
||||
|
||||
gb_pairwise_df = df.groupby(by=["condition1", "condition2"])
|
||||
groups = list(gb_pairwise_df.groups.keys())
|
||||
|
||||
# decompose table to be samples and group focused
|
||||
data_dict = {}
|
||||
for group in groups:
|
||||
str_group = f"{'__'.join(group)}"
|
||||
data_dict[str_group] = {}
|
||||
group_df = gb_pairwise_df.get_group(group)
|
||||
for sample in samples:
|
||||
data_dict[str_group][sample] = group_df.loc[
|
||||
group_df["sample"] == sample
|
||||
].sort_values(by="file")
|
||||
|
||||
# save data to analyse further
|
||||
with pd.ExcelWriter(f"{outpath}.xlsx", engine="openpyxl") as writer:
|
||||
for g, data in data_dict.items():
|
||||
for idx, (_, file_data) in enumerate(data.items()):
|
||||
file_data.to_excel(
|
||||
writer,
|
||||
sheet_name=g,
|
||||
startrow=(idx * len(file_data)) + (2 * idx),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_sample_paths = "data/test_samples.txt"
|
||||
conditions = [
|
||||
"validation_data",
|
||||
"v1.2.6_manifest_version",
|
||||
"pgxCleaner_server",
|
||||
"pgxCleaner_prod_updates",
|
||||
]
|
||||
|
||||
main(test_sample_paths, conditions)
|
||||
outpath = "data/test"
|
||||
main(test_sample_paths, conditions, outpath)
|
||||
|
||||
Reference in New Issue
Block a user