Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions dorado/cli/cli_lib/demux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ std::shared_ptr<const dorado::demux::BarcodingInfo> get_barcoding_info(
}
result->barcode_both_ends = parser.get<bool>("--barcode-both-ends");
result->trim = !parser.get<bool>("--no-trim");
result->max_barcode_errors = parser.get<int>("--max-barcode-errors");
auto barcode_sample_sheet = parser.get<std::string>("--sample-sheet");
if (!barcode_sample_sheet.empty()) {
result->sample_sheet =
Expand Down Expand Up @@ -129,6 +130,12 @@ int demuxer(int argc, char* argv[]) {
.help("Path to file with custom barcode arrangement.");
parser.add_argument("--barcode-sequences")
.help("Path to file with custom barcode sequences.");
parser.add_argument("--max-barcode-errors")
.help("Enable fuzzy barcode matching using edit distance. Accepts barcodes "
"with up to this many errors (insertions, deletions, substitutions). "
"Bypasses flank-based scoring. Use -1 to disable (default).")
.default_value(-1)
.scan<'i', int>();
}
{
parser.add_group("Trimming arguments");
Expand Down
100 changes: 99 additions & 1 deletion dorado/demux/BarcodeClassifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,110 @@ BarcodeClassifier::~BarcodeClassifier() = default;

BarcodeScoreResult BarcodeClassifier::barcode(const std::string& seq,
bool barcode_both_ends,
const BarcodeFilterSet& allowed_barcodes) const {
const BarcodeFilterSet& allowed_barcodes,
int max_barcode_errors) const {
if (max_barcode_errors >= 0) {
return barcode_fuzzy(seq, m_barcode_candidates, max_barcode_errors, allowed_barcodes);
}
auto best_barcode =
find_best_barcode(seq, m_barcode_candidates, barcode_both_ends, allowed_barcodes);
return best_barcode;
}

BarcodeScoreResult BarcodeClassifier::barcode_fuzzy(
const std::string& read_seq,
const std::vector<BarcodeCandidateKit>& candidates,
int max_errors,
const BarcodeFilterSet& allowed_barcodes) const {
if (read_seq.empty() || candidates.empty()) {
return UNCLASSIFIED;
}

const BarcodeCandidateKit& candidate = candidates[0];

auto edlib_config = edlibDefaultAlignConfig();
edlib_config.mode = EDLIB_MODE_HW; // Semi-global: find barcode anywhere in read
edlib_config.task = EDLIB_TASK_LOC;

std::vector<BarcodeScoreResult> results;

for (size_t i = 0; i < candidate.barcodes1.size(); i++) {
if (!barcode_is_permitted(allowed_barcodes, candidate.barcode_names[i])) {
continue;
}

const auto& barcode_fwd = candidate.barcodes1[i];
const auto& barcode_rev = candidate.barcodes1_rev[i];

// Try forward barcode against read
auto fwd_result = edlibAlign(barcode_fwd.data(), int(barcode_fwd.length()),
read_seq.data(), int(read_seq.length()), edlib_config);
int fwd_penalty = fwd_result.editDistance;
int fwd_start = (fwd_result.numLocations > 0) ? fwd_result.startLocations[0] : -1;
int fwd_end = (fwd_result.numLocations > 0) ? fwd_result.endLocations[0] + 1 : -1;
edlibFreeAlignResult(fwd_result);

// Try reverse complement barcode against read
auto rev_result = edlibAlign(barcode_rev.data(), int(barcode_rev.length()),
read_seq.data(), int(read_seq.length()), edlib_config);
int rev_penalty = rev_result.editDistance;
int rev_start = (rev_result.numLocations > 0) ? rev_result.startLocations[0] : -1;
int rev_end = (rev_result.numLocations > 0) ? rev_result.endLocations[0] + 1 : -1;
edlibFreeAlignResult(rev_result);

bool use_fwd = (fwd_penalty <= rev_penalty);
int best_penalty = use_fwd ? fwd_penalty : rev_penalty;

BarcodeScoreResult res;
res.barcode_name = candidate.barcode_names[i];
res.kit = candidate.kit;
res.barcode_kit = candidate.barcode_kit;
res.penalty = best_penalty;
res.barcode_score = (barcode_fwd.length() > 0)
? 1.0f - float(best_penalty) / float(barcode_fwd.length())
: 0.0f;
res.flank_score = 1.0f; // No flank scoring in fuzzy mode

if (use_fwd) {
res.top_barcode_pos = {fwd_start, fwd_end};
res.top_penalty = fwd_penalty;
res.top_barcode_score = res.barcode_score;
} else {
res.bottom_barcode_pos = {rev_start, rev_end};
res.bottom_penalty = rev_penalty;
res.bottom_barcode_score = res.barcode_score;
}
res.use_top = use_fwd;

results.push_back(std::move(res));
}

if (results.empty()) {
return UNCLASSIFIED;
}

// Sort by penalty (ascending)
std::sort(results.begin(), results.end(),
[](const auto& l, const auto& r) { return l.penalty < r.penalty; });

const auto& best = results[0];

// Check if the best penalty is within the max errors threshold
if (best.penalty > max_errors) {
return UNCLASSIFIED;
}

// Require separation from second best to avoid ambiguous assignments
if (results.size() > 1) {
int penalty_dist = results[1].penalty - best.penalty;
if (penalty_dist < m_scoring_params.min_barcode_penalty_dist) {
return UNCLASSIFIED;
}
}

return best;
}

// Generate all possible barcode candidates. If kit name is passed
// limit the candidates generated to only the specified kits. This is done
// to frontload some of the computation, such as calculating flanks
Expand Down
7 changes: 6 additions & 1 deletion dorado/demux/include/demux/BarcodeClassifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ class BarcodeClassifier {

BarcodeScoreResult barcode(const std::string& seq,
bool barcode_both_ends,
const BarcodeFilterSet& allowed_barcodes) const;
const BarcodeFilterSet& allowed_barcodes,
int max_barcode_errors = -1) const;

private:
const KitInfoProvider m_kit_info_provider;
Expand Down Expand Up @@ -54,6 +55,10 @@ class BarcodeClassifier {
const std::vector<BarcodeCandidateKit>& adapter,
bool barcode_both_ends,
const BarcodeFilterSet& allowed_barcodes) const;
BarcodeScoreResult barcode_fuzzy(const std::string& read_seq,
const std::vector<BarcodeCandidateKit>& candidates,
int max_errors,
const BarcodeFilterSet& allowed_barcodes) const;
};

} // namespace demux
Expand Down
1 change: 1 addition & 0 deletions dorado/demux/include/demux/barcoding_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ struct BarcodingInfo {
std::string kit_name;
bool barcode_both_ends{false};
bool trim{false};
int max_barcode_errors{-1}; // -1 = use normal flank-based scoring; >= 0 = fuzzy edit-distance matching
BarcodeFilterSet allowed_barcodes;
std::shared_ptr<const utils::SampleSheet> sample_sheet;
};
Expand Down
6 changes: 4 additions & 2 deletions dorado/read_pipeline/nodes/BarcodeClassifierNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ void BarcodeClassifierNode::barcode(BamMessage& message,
}

auto bc_res = barcoder->barcode(seq, barcoding_info->barcode_both_ends,
barcoding_info->allowed_barcodes);
barcoding_info->allowed_barcodes,
barcoding_info->max_barcode_errors);
auto bc = generate_barcode_string(bc_res);
if (barcoding_info->sample_sheet) {
bc_res.alias = barcoding_info->sample_sheet->get_alias(bc);
Expand Down Expand Up @@ -207,7 +208,8 @@ void BarcodeClassifierNode::barcode(SimplexRead& read) {

// get the sequence to map from the record
auto bc_res = barcoder->barcode(read.read_common.seq, barcoding_info->barcode_both_ends,
barcoding_info->allowed_barcodes);
barcoding_info->allowed_barcodes,
barcoding_info->max_barcode_errors);
read.read_common.barcode = generate_barcode_string(bc_res);
utils::trace_log("Barcode for {} is {}", read.read_common.read_id, read.read_common.barcode);
{
Expand Down
3 changes: 3 additions & 0 deletions dorado/torch_utils/include/torch_utils/metal_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@

// Some NS types make use of tagged pointers which aren't aligned and trip up UBSan.
// NS::SharedPtr also incorrectly uses the no_sanitize attribute since it should be on the inner class.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wignored-attributes"
#pragma clang attribute push(__attribute__((no_sanitize("alignment"), no_sanitize("null"))), \
apply_to = function)
#include <Metal/Metal.hpp>
#pragma clang attribute pop
#pragma clang diagnostic pop

#include "utils/PostCondition.h"

Expand Down
2 changes: 1 addition & 1 deletion tests/BarcodeClassifierTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ std::shared_ptr<const demux::BarcodingInfo> create_barcoding_info(
return {};
}

auto result = demux::BarcodingInfo{kit_name, barcode_both_ends, trim_barcode,
auto result = demux::BarcodingInfo{kit_name, barcode_both_ends, trim_barcode, -1,
std::move(allowed_barcodes), nullptr};
return std::make_shared<demux::BarcodingInfo>(std::move(result));
}
Expand Down