Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ lru_time_cache = "0.11.1"
num_cpus = "1.13.0"
rayon = "1.4.0"
rust-htslib = {version = "0.51", default-features = false, features = ["libdeflate"]}
seqair = { git = "https://tangled.org/softleif.se/seqair.git", rev = "28766a811596437f685f76aac753f963cde96f7f", optional = true }
seqair-types = { git = "https://tangled.org/softleif.se/seqair.git", rev = "28766a811596437f685f76aac753f963cde96f7f", optional = true }
seqair = { git = "https://github.com/sstadick/seqair", rev = "6d9251ccc7681c9ed6b26be24ae61cf59c0a8de9", optional = true }
seqair-types = { git = "https://github.com/sstadick/seqair", rev = "6d9251ccc7681c9ed6b26be24ae61cf59c0a8de9", optional = true }
rust-lapper = "1.0"
serde = { version = "1.0.116", features = ["derive"] }
smartstring = { version = "1.0.1", features = ["serde"] }
Expand Down
35 changes: 20 additions & 15 deletions src/commands/base_depth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
use anyhow::Result;
use bio::io::fasta::IndexedReader;
use log::*;
#[cfg(feature = "seqair-pileup")]
use perbase_lib::position::pileup_position::SeqairPileupPositionAccumulator;
use perbase_lib::{
par_granges::{self, RegionProcessor},
position::{Position, mate_fix::MateResolutionStrategy, pileup_position::PileupPosition},
Expand Down Expand Up @@ -404,27 +406,30 @@ impl<F: ReadFilter> BaseProcessor<F> {
engine.set_max_depth(self.max_depth);

let mut result = Vec::new();
while let Some(column) = engine.pileups() {
let pileup_depth = u32::try_from(column.depth()).unwrap_or(u32::MAX);
let mut pos = if self.mate_fix {
PileupPosition::from_seqair_column_mate_aware(
if self.mate_fix {
while let Some(column) = engine.pileups() {
let pileup_depth = u32::try_from(column.depth()).unwrap_or(u32::MAX);
let mut pos = PileupPosition::from_seqair_column_mate_aware(
ref_name.clone(),
&column,
&self.read_filter,
self.min_base_quality_score,
self.mate_fix_strategy,
)
} else {
PileupPosition::from_seqair_column(
ref_name.clone(),
&column,
&self.read_filter,
self.min_base_quality_score,
)
};
);

self.finalize_pileup_position(&mut pos, pileup_depth);
result.push(pos);
self.finalize_pileup_position(&mut pos, pileup_depth);
result.push(pos);
}
} else {
let mut accumulator = SeqairPileupPositionAccumulator::new(
ref_name.clone(),
&self.read_filter,
self.min_base_quality_score,
);
while let Some(mut accumulated) = engine.pileup_with(&mut accumulator) {
self.finalize_pileup_position(&mut accumulated.position, accumulated.pileup_depth);
result.push(accumulated.position);
}
}

self.fill_zero_positions(result, ref_name, start, stop)
Expand Down
126 changes: 99 additions & 27 deletions src/commands/only_depth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ pub struct OnlyDepth {
#[structopt(long, short = "x")]
fast_mode: bool,

/// Use the experimental seqair-backed reader with raw pre-filtering. Requires unmapped reads to be excluded (e.g. -F 4, or -F 3852 instead of -F 3848).
/// Use the experimental seqair-backed reader with raw pre-filtering.
#[cfg(feature = "seqair-pileup")]
#[structopt(long = "seqair")]
seqair: bool,
Expand Down Expand Up @@ -197,12 +197,6 @@ impl OnlyDepth {
"The experimental seqair only-depth path currently supports BAM input only"
);
}
#[cfg(feature = "seqair-pileup")]
if self.seqair && self.exclude_flags & 0x4 == 0 {
anyhow::bail!(
"The experimental seqair only-depth path requires unmapped reads to be excluded; add -F 4, or use -F 3852 if you would otherwise use -F 3848"
);
}
let cpus = utils::determine_allowed_cpus(self.threads)?;

let mut writer = utils::get_writer(
Expand Down Expand Up @@ -347,7 +341,16 @@ impl<F: ReadFilter> OnlyDepthProcessor<F> {
fn with_seqair(mut self, seqair: bool) -> Result<Self> {
self.seqair = seqair;
if seqair {
self.seqair_reader_template = Some(seqair::reader::IndexedReader::open(&self.reads)?);
let reader = match seqair::reader::IndexedReader::open(&self.reads)? {
// `only-depth` consumes raw fetched records (like htslib `view`), not seqair's
// pileup stream. Keep placed-unmapped BAM records in the fetch stream and let
// perbase's normal read-filter semantics decide whether `-F 4` excludes them.
seqair::reader::IndexedReader::Bam(reader) => {
seqair::reader::IndexedReader::Bam(reader.keep_unmapped(true))
}
reader => reader,
};
self.seqair_reader_template = Some(reader);
}
Ok(self)
}
Expand Down Expand Up @@ -689,6 +692,24 @@ impl<F: ReadFilter> OnlyDepthProcessor<F> {
}
}

#[cfg(feature = "seqair-pileup")]
#[inline(always)]
fn seqair_fast_reference_stop(
rec: &seqair::bam::record_store::SlimRecord,
cigar: &[seqair::bam::CigarOp],
) -> u32 {
// Match rust-htslib's `Record::reference_end()` behavior for placed-unmapped
// records: even if a CIGAR is present, fast-mode treats the record as a
// one-base placed interval. Normal `only-depth` still walks CIGAR blocks.
if rec.flags.is_unmapped() {
(*rec.pos)
.checked_add(1)
.expect("seqair unmapped end position overflow")
} else {
Self::seqair_reference_stop(rec, cigar)
}
}

#[cfg(feature = "seqair-pileup")]
fn merge_seqair_mate_intervals(
counter: &mut [i32],
Expand Down Expand Up @@ -716,11 +737,9 @@ impl<F: ReadFilter> OnlyDepthProcessor<F> {
) -> Vec<RangePositions> {
let mut counter: Vec<i32> = vec![0; (stop - start) as usize];

for idx in 0..store.len() {
let idx = u32::try_from(idx).expect("seqair record index");
let rec = store.record(idx);
for rec in store.records() {
let mut ref_pos = *rec.pos;
for op in store.cigar(idx) {
for op in rec.cigar(&store).expect("seqair CIGAR") {
let len = op.len();
match op.op_type() {
seqair::bam::cigar::CigarOpType::Match
Expand Down Expand Up @@ -773,14 +792,13 @@ impl<F: ReadFilter> OnlyDepthProcessor<F> {
let mut counter: Vec<i32> = vec![0; (stop - start) as usize];
let mut maties: HashMap<String, Vec<Interval<usize, bool>>> = HashMap::new();

for idx in 0..store.len() {
let idx = u32::try_from(idx).expect("seqair record index");
let rec = store.record(idx);
let cigar = store.cigar(idx);
for rec in store.records() {
let cigar = rec.cigar(&store).expect("seqair CIGAR");
let reference_stop = Self::seqair_reference_stop(rec, cigar);
let maybe_mate_key = if OnlyDepth::maybe_overlaps_mate_seqair(rec, reference_stop) {
Some(String::from(
std::str::from_utf8(store.qname(idx)).expect("Convert qname"),
std::str::from_utf8(rec.qname(&store).expect("seqair qname"))
.expect("Convert qname"),
))
} else {
None
Expand Down Expand Up @@ -848,10 +866,9 @@ impl<F: ReadFilter> OnlyDepthProcessor<F> {
) -> Vec<RangePositions> {
let mut counter: Vec<i32> = vec![0; (stop - start) as usize];

for idx in 0..store.len() {
let idx = u32::try_from(idx).expect("seqair record index");
let rec = store.record(idx);
let reference_stop = Self::seqair_reference_stop(rec, store.cigar(idx));
for rec in store.records() {
let reference_stop =
Self::seqair_fast_reference_stop(rec, rec.cigar(&store).expect("seqair CIGAR"));
if let Some((adjusted_start, adjusted_stop, dont_count_stop)) =
Self::adjusted_seqair_interval(counter.len(), *rec.pos, reference_stop, start, stop)
{
Expand All @@ -878,16 +895,17 @@ impl<F: ReadFilter> OnlyDepthProcessor<F> {
let mut counter: Vec<i32> = vec![0; (stop - start) as usize];
let mut maties: HashMap<String, Vec<Interval<usize, bool>>> = HashMap::new();

for idx in 0..store.len() {
let idx = u32::try_from(idx).expect("seqair record index");
let rec = store.record(idx);
let reference_stop = Self::seqair_reference_stop(rec, store.cigar(idx));
for rec in store.records() {
let reference_stop =
Self::seqair_fast_reference_stop(rec, rec.cigar(&store).expect("seqair CIGAR"));
if let Some((adjusted_start, adjusted_stop, dont_count_stop)) =
Self::adjusted_seqair_interval(counter.len(), *rec.pos, reference_stop, start, stop)
{
if OnlyDepth::maybe_overlaps_mate_seqair(rec, reference_stop) {
let qname =
String::from(std::str::from_utf8(store.qname(idx)).expect("Convert qname"));
let qname = String::from(
std::str::from_utf8(rec.qname(&store).expect("seqair qname"))
.expect("Convert qname"),
);
maties.entry(qname).or_default().push(Interval {
start: adjusted_start,
stop: adjusted_stop,
Expand Down Expand Up @@ -1385,6 +1403,60 @@ mod tests {
assert_eq!(htslib_positions, seqair_positions);
}

#[cfg(feature = "seqair-pileup")]
#[rstest(fast_mode => [false, true])]
fn seqair_only_depth_keeps_placed_unmapped_like_htslib_records(fast_mode: bool) {
let tempdir = tempdir().unwrap();
let bam_path = tempdir.path().join("placed_unmapped.bam");

let mut header = bam::header::Header::new();
let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
chr1.push_tag(b"SN", &"chr1".to_owned());
chr1.push_tag(b"LN", &"100".to_owned());
header.push_record(&chr1);
let view = bam::HeaderView::from_header(&header);

let records = [
Record::from_sam(
&view,
b"MAPPED\t0\tchr1\t1\t40\t10M\t*\t0\t0\tAAAAAAAAAA\tIIIIIIIIII",
)
.unwrap(),
// Placed-unmapped records can be returned by htslib's raw record iterator after
// region fetch. seqair 0.1.0's `keep_unmapped(true)` lets perbase apply the same
// user read filters instead of dropping them before `filter_raw`.
Record::from_sam(
&view,
b"PLACED_UNMAPPED\t4\tchr1\t5\t0\t10M\t*\t0\t0\tCCCCCCCCCC\tIIIIIIIIII",
)
.unwrap(),
];

let mut writer = bam::Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
for record in &records {
writer.write(record).unwrap();
}
drop(writer);
bam::index::build(&bam_path, None, bam::index::Type::Bai, 1).unwrap();

let htslib_positions = collect_only_depth_tuples(
bam_path.clone(),
fast_mode,
false,
false,
DefaultReadFilter::new(0, 0, 0),
);
let seqair_positions = collect_only_depth_tuples(
bam_path,
fast_mode,
false,
true,
DefaultReadFilter::new(0, 0, 0),
);

assert_eq!(htslib_positions, seqair_positions);
}

#[rstest(
positions,
awareness_modifier,
Expand Down
Loading