Compare commits

...

3 Commits

5 changed files with 392 additions and 43 deletions

1
.gitignore vendored
View File

@@ -188,3 +188,4 @@ cython_debug/
# PyPI configuration file # PyPI configuration file
.pypirc .pypirc
data/

View File

@@ -0,0 +1,156 @@
import marimo
__generated_with = "0.16.4"
app = marimo.App(width="medium")
@app.cell
def _():
import os
import json
import statistics
import marimo as mo
import pandas as pd
import plotly.express as px
return json, mo, os, pd, statistics
@app.cell
def _(mo):
mo.md(
r"""
# EDA of LB Assay Coverage
Data was generated by `get_panel_coverage.py` that uses the LB panels LB covered bed (with flankning sequence added, i.e. the variant calling bed) with the deduplicated bam/cram.
The output json from this analysis for each sample has the following form🥇
```json
{
"chr<#>_<start>_<stop>": [
"depth_pos1",
"depth_pos2",
"depth_pos3",
...
"depth_posN"
]
}
```
Analysis Plan:
- Poorly covered regions (i.e. bad probes/challenging areas to align)
- Overly covered regions (i.e. potential areas that are challenging for aligners or have low complexity)
- Comparison of Serenomica and Quantgene in overlap areas
"""
)
return
@app.cell
def _():
# data
se_path = "../data/coverage_serenomica_QG/se1_se2/"
qg_path = "../data/coverage_serenomica_QG/QG_historical/"
return qg_path, se_path
@app.cell
def _(json, os):
def parse_data(dpath):
files = [f for f in os.listdir(dpath) if f.endswith(".json")]
parsed_data, mean_data = {}
for file in files:
name = file.split("_cov")[0]
with open(os.path.join(dpath, file), "r") as handle:
parsed_data[name] = json.load(handle)
return parsed_data
return (parse_data,)
@app.cell
def _(parse_data, qg_path, se_path):
se_data = parse_data(se_path)
qg_data = parse_data(qg_path)
return qg_data, se_data
@app.cell
def _(statistics):
def summerize_list(lst):
vals = list(map(int, lst))
return (
statistics.mean(vals),
statistics.median(vals),
statistics.stdev(vals),
min(vals),
max(vals),
sum(vals),
len(vals),
)
def get_summary_data(sample_data):
out_data = {}
sample_sum, sample_pos_count = 0, 0
for region, depths in sample_data.items():
(mean, median, std, min_val, max_val, sum_vals, count) = (
summerize_list(depths)
)
out_data[region] = {
"min": min_val,
"max": max_val,
"mean": mean,
"median": median,
"std": std,
}
sample_sum += sum_vals
sample_pos_count += count
out_data["overall"] = {"mean": sample_sum / sample_pos_count}
return out_data
return (get_summary_data,)
@app.cell
def _(get_summary_data, qg_data, se_data):
se_summary_data = {}
for se_sample, se_sample_data in se_data.items():
se_summary_data[se_sample] = get_summary_data(se_sample_data)
qg_summary_data = {}
for qg_sample, qg_sample_data in qg_data.items():
qg_summary_data[qg_sample] = get_summary_data(qg_sample_data)
return (se_summary_data,)
@app.cell
def _():
return
@app.cell
def _(pd):
def extract_stat_as_df(data_dict, access_key):
tmp_dict = {}
for name, sample_data in data_dict.items():
tmp_dict[name] = {
region: data[access_key]
for region, data in sample_data.items()
if region != "overall"
}
return pd.DataFrame.from_dict(tmp_dict, orient="index")
return (extract_stat_as_df,)
@app.cell
def _(extract_stat_as_df, se_summary_data):
extract_stat_as_df(se_summary_data, access_key="std")
return
@app.cell
def _():
return
if __name__ == "__main__":
app.run()

View File

@@ -1,6 +1,6 @@
import marimo import marimo
__generated_with = "0.14.16" __generated_with = "0.16.4"
app = marimo.App(width="medium") app = marimo.App(width="medium")
@@ -168,7 +168,7 @@ def _(mo):
def _(os, pd, qc_hist_dir): def _(os, pd, qc_hist_dir):
# QG historical data # QG historical data
qg_hist_stats = pd.read_csv( qg_hist_stats = pd.read_csv(
os.path.join(qc_hist_dir, "historical_data_LB.csv"), index_col=0 os.path.join(qc_hist_dir, "historical_data_LB-QG.csv"), index_col=0
) )
qg_hist_stats.columns = [i.replace("|", " / ") for i in qg_hist_stats.columns] qg_hist_stats.columns = [i.replace("|", " / ") for i in qg_hist_stats.columns]
qg_hist_stats = qg_hist_stats.drop(["batch"], axis=1) qg_hist_stats = qg_hist_stats.drop(["batch"], axis=1)
@@ -866,6 +866,33 @@ def _(clinicals, meta, pd, stats):
return (stats_meta,) return (stats_meta,)
@app.cell
def _(pd):
meta_qg = pd.read_excel(
"/home/darren/Documents/4_data/3_internal/2_lb/QC/lb2_metadata.xlsx",
index_col="fastq_id",
)
meta_qg
return (meta_qg,)
@app.cell
def _(meta_qg, pd, qg_hist_stats):
# merge stats and metadata and remove seracares
stats_meta_qg = pd.concat([qg_hist_stats, meta_qg], axis=1)
stats_meta_qg = stats_meta_qg.loc[
[
i
for i in stats_meta_qg.index
if "twist" not in i.lower() and "sera" not in i.lower()
],
:,
]
stats_meta_qg = stats_meta_qg.reset_index()
stats_meta_qg.head()
return (stats_meta_qg,)
@app.cell @app.cell
def _(mo, stats_meta): def _(mo, stats_meta):
var1 = mo.ui.dropdown(options=stats_meta.columns, label="Variable 1") var1 = mo.ui.dropdown(options=stats_meta.columns, label="Variable 1")
@@ -975,6 +1002,108 @@ def _(stats_meta):
return return
@app.cell
def _(stats_meta_melt):
stats_meta_melt["Flow cell #"].unique()
return
@app.cell
def _(qg_hist_stats):
qg_hist_stats
return
@app.cell
def _(pd, stats_meta, stats_meta_qg):
stats_meta["lab"] = "Serenomica"
stats_meta_qg["lab"] = "Quantgene"
all_stats_meta = pd.concat([stats_meta, stats_meta_qg], axis=0)
all_stats_meta.shape
return (all_stats_meta,)
@app.cell
def _(all_stats_meta):
all_stats_meta["ng_group"] = [
"< 30ng" if i < 30 else "> 30ng"
for i in all_stats_meta["Input DNA used (ng)"]
]
all_stats_meta[["lab", "ng_group"]].value_counts()
return
@app.cell
def _(all_stats_meta, pd):
all_stats_meta_melt = pd.melt(
all_stats_meta,
id_vars=["index", "sample_type", "ng_group", "Flow cell #", "lab"],
)
all_stats_meta_melt
return (all_stats_meta_melt,)
@app.cell
def _(all_stats_meta_melt, mo):
box_vars2 = mo.ui.dropdown(
options=all_stats_meta_melt["variable"].unique(), label="Variables to plot"
)
mo.hstack([box_vars2])
return (box_vars2,)
@app.cell
def _(all_stats_meta_melt, box_vars2, px):
fig7 = px.box(
data_frame=all_stats_meta_melt.loc[
(all_stats_meta_melt["variable"] == box_vars2.value)
],
color="ng_group",
y="value",
x="lab",
template="simple_white",
points="all",
labels={
"ng_group": "cfDNA Input for Library",
"value": box_vars2.value,
"lab": "Laboratory",
},
color_discrete_sequence=px.colors.qualitative.Dark2,
category_orders={"ng_group": ["< 30ng", "> 30ng"]},
hover_data=["index"],
width=800,
)
fig7.update_traces(
marker=dict(size=10, line=dict(width=1, color="DarkSlateGrey")),
selector=dict(type="points"),
)
fig7.update_xaxes(tickfont_size=14)
fig7.update_yaxes(showgrid=True, tickfont_size=14)
fig7.update_legends(font_size=14)
for trace2 in fig7.select_traces():
trace2.marker.update(
size=7, line=dict(width=1, color="DarkSlateGrey"), opacity=0.8
)
fig7.show()
return
@app.cell
def _(all_stats_meta):
all_stats_meta[["lab", "ng_group"]].value_counts()
return
@app.cell
def _(stats_meta):
stats_meta.sort_values("Input DNA used (ng)")[
["index", "Input DNA used (ng)", "Flow cell #"]
]
return
@app.cell @app.cell
def _(): def _():
return return

View File

@@ -0,0 +1,9 @@
marimo==0.16.4
matplotlib==3.10.3
matplotlib-inline==0.1.7
matplotlib-venn==1.1.2
openpyxl==3.1.5
pandas==2.3.1
plotly==6.2.0
pyyaml==6.0.3
seaborn==0.13.2

View File

@@ -10,7 +10,9 @@ logging.basicConfig(
) )
def main(s3_paths: list[str], bed_path: str, ref_path: str, cli_bin: str) -> None: def main(
s3_paths: list[str], bed_path: str, ref_path: str, cli_bin: str, tars: bool
) -> None:
logging.info("Script started...") logging.info("Script started...")
logging.info(f"Using reference: {ref_path}") logging.info(f"Using reference: {ref_path}")
logging.info(f"Using bed file: {bed_path}") logging.info(f"Using bed file: {bed_path}")
@@ -22,12 +24,21 @@ def main(s3_paths: list[str], bed_path: str, ref_path: str, cli_bin: str) -> Non
file_num = f"{idx+1}/{len(s3_paths)}" file_num = f"{idx+1}/{len(s3_paths)}"
logging.info(f"({file_num}) Downloading file: {name}") logging.info(f"({file_num}) Downloading file: {name}")
down_cmd = ( if tars:
f"{cli_bin} s3 cp {file} . --profile=se" down_unpack_cmd = (
if cli_bin == "aws" f"{cli_bin} s3 cp {file} . --profile=se"
else f"{cli_bin} get {file} ." if cli_bin == "aws"
) else f"{cli_bin} get {file} . && tar -xf {name} --strip-components=4 --wildcards --no-anchored '*.cram' && rm {name}"
run(down_cmd, shell=True, capture_output=False) )
run(down_unpack_cmd, shell=True, capture_output=False)
name = name.split(".")[0] + ".collapsed.cram"
else:
down_cmd = (
f"{cli_bin} s3 cp {file} . --profile=se"
if cli_bin == "aws"
else f"{cli_bin} get {file} ."
)
run(down_cmd, shell=True, capture_output=False)
sort_name = f"{name.split('.')[0]}.sorted.cram" sort_name = f"{name.split('.')[0]}.sorted.cram"
sort_cmd = f"samtools sort -@ 18 {name} --reference {ref_path} > {sort_name}" sort_cmd = f"samtools sort -@ 18 {name} --reference {ref_path} > {sort_name}"
@@ -47,40 +58,83 @@ def main(s3_paths: list[str], bed_path: str, ref_path: str, cli_bin: str) -> Non
if __name__ == "__main__": if __name__ == "__main__":
s3_paths = [ s3_paths = [
# "s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-25-0001-pa-21Mar2025_S33.collapsed.cram", # "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0087-pa-13Jun2023_S1.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-25-0002-pa-21Mar2025_S34.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0089-pa-13Jun2023_S2.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-25-0003-pa-21Mar2025_S35.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0090-pa-13Jun2023_S3.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-25-0004-pa-21Mar2025_S36.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0091-pa-13Jun2023_S4.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-25-0005-pa-21Mar2025_S37.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0092-pa-13Jun2023_S5.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-25-0006-pa-21Mar2025_S38.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0096-pa-13Jun2023_S6.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-0d125pc-1-28Mar2025_S41.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0098-pa-29Jun2023_S1.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-0d125pc-2-28Mar2025_S42.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0099-pa-29Jun2023_S2.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-0d25pc-1-28Mar2025_S43.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0100-pa-29Jun2023_S3.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-0d25pc-2-28Mar2025_S44.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0103-pa-29Jun2023_S4.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-0d5pc-1-28Mar2025_S45.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0105-pa-29Jun2023_S5.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-0d5pc-2-28Mar2025_S46.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0107-pa-29Jun2023_S6.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-0pc-1-28Mar2025_S39.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0112-pa-19Jul2023_S1.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-0pc-2-28Mar2025_S40.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0113-pa-19Jul2023_S2.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-1pc-1-28Mar2025_S48.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0119-pa-19Jul2023_S4.bams.tar",
"s3://serenomica-pipeline-archive/se1-prd-2.1.1/LB-SeraPlasma-1pc-2-28Mar2025_S47.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0122-pa-19Jul2023_S5.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0007-pa-1-23May2025_S20.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0123-pa-19Jul2023_S6.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0007-pa-2-23May2025_S21.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0124-pa-19Jul2023_S7.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0008-pa-1-23May2025_S22.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0125-pa-19Jul2023_S8.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0008-pa-2-23May2025_S23.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0126-pa-19Jul2023_S9.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0009-pa-1-23May2025_S24.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0128-pa-19Jul2023_S10.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0009-pa-2-23May2025_S25.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0129-pa-19Jul2023_S3.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0010-pa-1-23May2025_S26.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0130-pa-09Aug2023_S1.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0010-pa-2-23May2025_S27.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0132-pa-09Aug2023_S2.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0011-pa-1-23May2025_S32.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0133-pa-09Aug2023_S3.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0012-pa-1-23May2025_S28.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0134-pa-09Aug2023_S4.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0012-pa-2-23May2025_S29.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0135-pa-09Aug2023_S5.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0032-pa-1-23May2025_S30.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0136-pa-09Aug2023_S6.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-25-0032-pa-2-23May2025_S31.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0137-pa-09Aug2023_S7.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-SeraCare-0d25pc-1-21Mar2025_S17.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0138-pa-30Aug2023_S1.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-SeraCare-0d25pc-2-21Mar2025_S18.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0140-pa-30Aug2023_S2.bams.tar",
"s3://serenomica-pipeline-archive/se2-lb-1/LB-SeraPlasma-0pc-3-28Mar2025_S19.collapsed.cram", "s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0141-pa-30Aug2023_S3.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0142-pa-30Aug2023_S4.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0144-pa-30Aug2023_S5.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0145-pa-30Aug2023_S6.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0146-pa-30Aug2023_S7.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0147-pa-30Aug2023_S8.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0153-pa-25Oct2023_S1.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0156-pa-25Oct2023_S2.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0159-pa-25Oct2023_S3.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0160-pa-25Oct2023_S4.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-23-0186-pa-14Feb2024_S1.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0001-pa-14Feb2024_S2.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0015-pa-14Feb2024_S3.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0019-pa-14Feb2024_S4.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0023-pa-14Feb2024_S5.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0024-pa-14Feb2024_S6.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0027-pa-14Feb2024_S7.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0037-pa-28Mar2024_S16.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0038-pa-28Mar2024_S17.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0044-pa-13May2024_S2.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0046-pa-13May2024_S3.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0047-pa-13May2024_S4.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0047-pa-23May2024_S1.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0048-pa-13May2024_S5.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0049-pa-13May2024_S6.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0049-pa-23May2024_S2.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0050-pa-13May2024_S7.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0050-pa-23May2024_S3.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0053-pa-13May2024_S8.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0053-pa-23May2024_S4.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0054-pa-13May2024_S9.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0055-pa-13May2024_S10.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0056-pa-13May2024_S11.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0057-pa-13May2024_S12.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0058-pa-13May2024_S13.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0059-pa-23May2024_S5.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0060-pa-23May2024_S6.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0063-pa-23May2024_S7.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0064-pa-23May2024_S8.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-24-0065-pa-23May2024_S9.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-GVA-HC-246-13Jun2023_S10.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-GVA-ME-339-13Jun2023_S9.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-GVA-PA-307-13Jun2023_S8.bams.tar",
"s3://serenomica-pipeline-archive/historical-qg-qc-samples/LB-GVA-PA-308-13Jun2023_S7.bams.tar",
] ]
# "/home/darren/Documents/4_data/3_internal/1_panels/TWIST/twist_LB_probes_plus_30bp_merged.sorted.bed" # "/home/darren/Documents/4_data/3_internal/1_panels/TWIST/twist_LB_probes_plus_30bp_merged.sorted.bed"
bed_path = "/home/darren/Documents/4_data/3_internal/1_panels/TWIST/twist_LB_probes_plus_30bp_merged.sorted.bed" bed_path = "/home/darren/Documents/4_data/3_internal/1_panels/Aglient/LB_3413441_Covered_50ext.sorted.bed"
ref_path = "/home/darren/Documents/4_data/1_genomes/human/hg38/Homo_sapiens_assembly38.fasta" ref_path = "/home/darren/Documents/4_data/1_genomes/human/hg38/Homo_sapiens_assembly38.fasta"
main(s3_paths, bed_path, ref_path, cli_bin="aws") main(s3_paths, bed_path, ref_path, cli_bin="s3cmd", tars=True)