Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
150 commits
Select commit Hold shift + click to select a range
2e352bf
update
mrvollger Jul 31, 2024
98cc544
update
mrvollger Jul 31, 2024
d3449ea
update
mrvollger Jul 31, 2024
e748559
update
mrvollger Jul 31, 2024
f237c3a
ylines
mrvollger Aug 12, 2024
181eb91
bigtools
mrvollger Aug 14, 2024
eba5937
bigtools
mrvollger Aug 14, 2024
ff82819
bigtools
mrvollger Aug 14, 2024
5b54142
bigtools
mrvollger Aug 14, 2024
a31671d
bigtools
mrvollger Aug 14, 2024
52a0ed2
bigtools
mrvollger Aug 14, 2024
f4b8c61
bigtools
mrvollger Aug 14, 2024
95c23ef
bigtools
mrvollger Aug 14, 2024
474a843
bigtools
mrvollger Aug 14, 2024
b2cd5a2
bigtools
mrvollger Aug 14, 2024
e9db2a6
bigtools
mrvollger Aug 14, 2024
951be2b
bigtools
mrvollger Aug 14, 2024
2a3625e
bigtools
mrvollger Aug 14, 2024
24ee367
bigtools
mrvollger Aug 14, 2024
302abc9
bigtools
mrvollger Aug 15, 2024
c81bae8
bigtools
mrvollger Aug 15, 2024
b03cc80
trakchub updates
mrvollger Aug 19, 2024
f3c77db
ref full path
mrvollger Aug 19, 2024
9367a11
ref full path
mrvollger Aug 19, 2024
b2f0188
description
mrvollger Aug 19, 2024
6e43acc
description
mrvollger Aug 19, 2024
5d7595c
description
mrvollger Aug 19, 2024
7403054
description
mrvollger Aug 19, 2024
3de3d95
description
mrvollger Aug 19, 2024
a7bfe0d
description
mrvollger Aug 19, 2024
7c6d032
description
mrvollger Aug 19, 2024
d01c642
description
mrvollger Aug 19, 2024
b92ceac
description
mrvollger Aug 19, 2024
b689768
description
mrvollger Aug 19, 2024
28c0271
description
mrvollger Aug 19, 2024
ec30dbd
empty file check
mrvollger Aug 21, 2024
c05680f
Add ft qc
mrvollger Sep 9, 2024
e52e1e3
Add ft qc
mrvollger Sep 10, 2024
a742a20
Add ft qc
mrvollger Sep 10, 2024
0489f86
Add ft qc
mrvollger Sep 10, 2024
3bdf4d1
Add ft qc
mrvollger Sep 10, 2024
e30cc5c
Add ft qc
mrvollger Sep 11, 2024
b489bc1
refactor to use ft pileup and replace zcat with bgzip
mrvollger Sep 13, 2024
95ea662
drop qual from cram to half the size of file
mrvollger Sep 13, 2024
5b4aeee
add sup filter by default, used to be within ft
mrvollger Sep 13, 2024
13830a0
add sup filter by default, used to be within ft
mrvollger Sep 14, 2024
1ec93cc
Major refactor to prepare for DSA
mrvollger Sep 20, 2024
383c7fc
Major refactor to prepare for DSA
mrvollger Sep 20, 2024
1275d73
Major refactor to prepare for DSA
mrvollger Sep 20, 2024
aee3cff
ruff check
mrvollger Sep 22, 2024
2c389a6
fmt
mrvollger Sep 29, 2024
cad8e7a
fmt
mrvollger Sep 29, 2024
61008bc
fmt
mrvollger Sep 29, 2024
e9d847c
fmt
mrvollger Sep 29, 2024
283f15e
fmt
mrvollger Sep 29, 2024
dc0bb20
fmt
mrvollger Sep 29, 2024
36fa569
fmt
mrvollger Sep 29, 2024
1186bd8
fmt
mrvollger Sep 29, 2024
193b2c1
fmt
mrvollger Sep 29, 2024
86c7de2
fmt
mrvollger Sep 30, 2024
13f6940
fmt
mrvollger Sep 30, 2024
c02b0ae
fmt
mrvollger Sep 30, 2024
bf1671f
more consistent formating
mrvollger Sep 30, 2024
5c2b206
more consistent formating
mrvollger Sep 30, 2024
adba0c5
more consistent formating
mrvollger Sep 30, 2024
7244ba3
more consistent formating
mrvollger Sep 30, 2024
9de3ff6
more consistent formating
mrvollger Sep 30, 2024
3a1ae66
more consistent formating
mrvollger Sep 30, 2024
a2a9ce6
more consistent formating
mrvollger Sep 30, 2024
f8adcef
more consistent formating
mrvollger Sep 30, 2024
991c4d1
more consistent formating
mrvollger Sep 30, 2024
e6bc9f2
more consistent formating
mrvollger Sep 30, 2024
988dec1
more consistent formating
mrvollger Sep 30, 2024
bb24076
more consistent formating
mrvollger Sep 30, 2024
e726723
more consistent formating
mrvollger Oct 1, 2024
89c9893
more name changes
mrvollger Oct 9, 2024
806ab3a
more name changes
mrvollger Oct 9, 2024
ed89b16
more name changes
mrvollger Oct 9, 2024
dc5130f
more name changes
mrvollger Oct 10, 2024
e53d299
more name changes
mrvollger Oct 10, 2024
c92b426
more name changes
mrvollger Oct 10, 2024
47b0ab3
more name changes
mrvollger Oct 10, 2024
b1bd08b
more name changes
mrvollger Oct 10, 2024
3ef814f
more name changes
mrvollger Oct 10, 2024
7e7133b
more name changes
mrvollger Oct 10, 2024
e5185ff
more name changes
mrvollger Oct 10, 2024
aa4b709
more name changes
mrvollger Oct 10, 2024
a71472b
more name changes
mrvollger Oct 10, 2024
eefdb33
place conda env first
mrvollger Oct 12, 2024
ba094e0
test runner
mrvollger Oct 12, 2024
8671b5d
test runner
mrvollger Oct 12, 2024
f4b5602
Merge branch 'main' into v0.0.8
mrvollger Oct 12, 2024
d05eb3f
test runner
mrvollger Oct 12, 2024
3cf8dd3
test runner
mrvollger Oct 12, 2024
2b57c36
test runner
mrvollger Oct 12, 2024
2bc0fc8
test runner
mrvollger Oct 12, 2024
9280204
test runner
mrvollger Oct 12, 2024
ac7e766
test runner
mrvollger Oct 12, 2024
d8fcba4
test runner
mrvollger Oct 13, 2024
8365849
remove runner
mrvollger Oct 13, 2024
b9ef1f1
Merge branch 'main' into v0.0.8
mrvollger Oct 13, 2024
dbb7852
remove runner
mrvollger Oct 13, 2024
2e9422a
ft update
Nov 14, 2024
1a495b1
switch back to bigtools and upgrade versions
Nov 18, 2024
ca3691f
Add pixi files to help inital install
Nov 18, 2024
801c2d8
switch back to bigtools and upgrade versions
Nov 18, 2024
54e0d92
add a test task to pixi file
Nov 21, 2024
651ea10
add some pixi install instructions
Nov 22, 2024
7a03409
add some pixi install instructions
Nov 22, 2024
15b68fb
add some pixi install instructions
Nov 22, 2024
0368104
format
Nov 22, 2024
a313b31
format
Nov 22, 2024
359681b
format
Nov 22, 2024
1b056b9
format
Nov 22, 2024
3df9a3b
format
Nov 22, 2024
2dbbf3b
format
Nov 22, 2024
5babca4
actions
Nov 22, 2024
78f125d
Merge branch 'main' into v0.0.8
mrvollger Nov 22, 2024
81e6d9b
actions
Nov 22, 2024
d9fd79a
actions
Nov 22, 2024
188a591
docs
Nov 22, 2024
befa347
docs
Nov 23, 2024
63b2dd3
add pixi version to outputs
Nov 23, 2024
a12d1ab
add pixi version to outputs
Nov 23, 2024
b68e020
add pixi version to outputs
Nov 23, 2024
b3284c4
add pixi version to outputs
Nov 25, 2024
eadfa4f
add pixi version to outputs
Nov 26, 2024
41e944c
gmt
Nov 26, 2024
ac184e3
gmt
Nov 26, 2024
174aea8
gmt
Nov 26, 2024
73a1b36
gmt
Nov 26, 2024
17ee205
fmt
Nov 26, 2024
05e45e7
fmt
Nov 27, 2024
1c2e964
fmt
Nov 27, 2024
db7f92f
fmt
Nov 27, 2024
5342a7e
fmt
Nov 27, 2024
45a8bf8
fmt
Nov 27, 2024
b33b44e
fmt
Nov 27, 2024
bb8edba
fmt
Nov 27, 2024
6087cba
fmt
Nov 27, 2024
74c3b4f
adj threads
Dec 3, 2024
e4e00e6
bigtools v
Dec 8, 2024
c3d8e1f
Merge branch 'main' into v0.0.8
mrvollger Dec 10, 2024
83ba801
Simplify application of ft fire
Dec 11, 2024
b4a68a9
update
Jan 6, 2025
d33d7c4
Merge branch 'main' into v0.0.8
mrvollger Jan 6, 2025
daa9272
update
Jan 6, 2025
5c1b884
error on empty csv
Jan 6, 2025
ea31e71
error on empty csv
Jan 6, 2025
3745dc1
error on empty csv
Jan 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# v0.1.0
# Change Log

All notable changes to this project will be documented in this file.

## v0.1.1

Added more informative error messages if an FDR distribution cannot be made or there is not enough coverage.

## v0.1.0

First major release of the FIRE pipeline. This release includes a refactor to reduce the computation by increased use of ft, changes to the output file names to include the fire version among other things, and finally a new launching method for the pipeline that uses pixi. Results are very similar to v0.0.7 of the pipeline; however, there are minor differences in the peak calls and the output names.
2 changes: 1 addition & 1 deletion pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels = ["conda-forge", "bioconda"]
description = "Add a short description here"
name = "FIRE"
platforms = ["osx-64", "linux-64"]
version = "0.1.0"
version = "0.1.1"

[tasks]
fmt = "ruff format . && taplo format pixi.toml && snakefmt workflow/"
Expand Down
6 changes: 4 additions & 2 deletions workflow/scripts/cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,12 @@ def polars_read():
print(f"\nmean coverage: {mean}", file=sys.stderr)
print(f"median coverage: {coverage}\n", file=sys.stderr)

if coverage <= 1:
if coverage < 5:
raise ValueError(
f"Median coverage is {coverage}! Did you use the correct reference, or is data missing from most of your genome. If so consider the keep_chromosomes parameter in config.yaml"
f"Median coverage is {coverage}! Did you use the correct reference, or is data missing from most of your genome. We recommend at least 10x coverage to use FIRE and require at least 5x."
"If you are only examining data from a subset of chromosomes, consider using the keep_chromosomes parameter in config.yaml"
)

open(snakemake.output.cov, "w").write(str(round(coverage)) + "\n")
open(snakemake.output.minimum, "w").write(str(round(min_coverage)) + "\n")
open(snakemake.output.maximum, "w").write(str(round(max_coverage)) + "\n")
31 changes: 26 additions & 5 deletions workflow/scripts/fdr-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import polars.selectors as cs
import gzip
import sys

# from numba import njit
ROLLING_FIRE_SCORE_WINDOW_SIZE = 200
Expand All @@ -25,12 +26,26 @@ def find_nearest(array, value):
return idx


def my_read_csv(*args, **kwargs):
try:
result = pl.read_csv(*args, **kwargs)
# do some transformation with the dataframe
except pl.exceptions.NoDataError as e:
print(
"No data is found in the input file. Check the input file and make sure it is not empty. It is likely that the input data was not generated correctly or that it was impossible to find peaks at the specified FDR value.",
file=sys.stderr,
)
print(e, file=sys.stderr)
sys.exit(1)
return result


# ['#chrom', 'start', 'end', 'coverage', 'fire_coverage', 'score', 'nuc_coverage', 'msp_coverage',
# 'coverage_H1', 'fire_coverage_H1', 'score_H1', 'nuc_coverage_H1', 'msp_coverage_H1',
# 'coverage_H2', 'fire_coverage_H2', 'score_H2', 'nuc_coverage_H2', 'msp_coverage_H2']
def read_pileup_file(infile, nrows):
# get the header from the first line of the file
header = pl.read_csv(infile, separator="\t", n_rows=1).columns
header = my_read_csv(infile, separator="\t", n_rows=1).columns

# check that there is at least two lines
open_infile = gzip.open if is_gzipped(infile) else open
Expand All @@ -51,7 +66,7 @@ def read_pileup_file(infile, nrows):
logging.info(f"Schema overrides for the pileup file:\n{schema_overrides}")

# read the file
pileup = pl.read_csv(
pileup = my_read_csv(
infile,
separator="\t",
has_header=False,
Expand Down Expand Up @@ -132,7 +147,7 @@ def fdr_table_from_scores(fire_scores):
return results


def make_fdr_table(infile, outfile, nrows, max_cov=None, min_cov=None):
def make_fdr_table(infile, outfile, nrows, max_cov=None, min_cov=None, max_fdr=0.05):
# read the pileup file
pileup = read_pileup_file(infile, nrows)
# filter on coverages if needed
Expand Down Expand Up @@ -172,11 +187,16 @@ def make_fdr_table(infile, outfile, nrows, max_cov=None, min_cov=None):
logging.info(f"Done aggregating pileup file:\n{fire_scores}")
fdr_table = fdr_table_from_scores(fire_scores)
fdr_table.to_csv(outfile, sep="\t", index=False)
# raise an error if no threshold below 0.05 is found
if fdr_table["FDR"].min() > max_fdr:
raise ValueError(
f"No FIRE score threshold has an FDR < {max_fdr}. Check the input Fiber-seq data with the QC pipeline and make sure you are using WGS Fiber-seq data."
)
return fdr_table


def read_fdr_table(infile):
fdr_table = pl.read_csv(infile, separator="\t").to_pandas()
fdr_table = my_read_csv(infile, separator="\t").to_pandas()
logging.info(f"Read FDR table:\n{fdr_table}")
return fdr_table

Expand Down Expand Up @@ -283,6 +303,7 @@ def main(
nrows: Optional[int] = None,
max_cov: Optional[int] = None,
min_cov: Optional[int] = None,
max_fdr: float = 0.05,
verbose: int = 0,
):
"""
Expand All @@ -303,7 +324,7 @@ def main(
apply_fdr_table(infile, outfile, fdr_table, nrows)
else:
fdr_table = make_fdr_table(
infile, outfile, nrows, min_cov=min_cov, max_cov=max_cov
infile, outfile, nrows, min_cov=min_cov, max_cov=max_cov, max_fdr=max_fdr
)
return 0

Expand Down
Loading