Skip to content

Commit

Permalink
fix ref allele mismatch
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasrausch committed Sep 14, 2023
1 parent 32f1e6f commit 11ecbac
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 51 deletions.
2 changes: 1 addition & 1 deletion src/coverage.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ namespace torali {

// Set tag alleles
if (itSV->chr == refIndex) {
itSV->alleles = _addAlleles(boost::to_upper_copy(std::string(seq + itSV->svStart - 1, seq + itSV->svStart)), std::string(hdr->target_name[itSV->chr2]), *itSV, itSV->svt);
if (itSV->alleles.empty()) itSV->alleles = _addAlleles(boost::to_upper_copy(std::string(seq + itSV->svStart - 1, seq + itSV->svStart)), std::string(hdr->target_name[itSV->chr2]), *itSV, itSV->svt);
}
if (!itSV->precise) continue;

Expand Down
24 changes: 1 addition & 23 deletions src/genotype.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ namespace torali

// Set tag alleles
if (itSV->chr == refIndex) {
itSV->alleles = _addAlleles(boost::to_upper_copy(std::string(seq + itSV->svStart - 1, seq + itSV->svStart)), std::string(hdr->target_name[itSV->chr2]), *itSV, itSV->svt);
if (itSV->alleles.empty()) itSV->alleles = _addAlleles(boost::to_upper_copy(std::string(seq + itSV->svStart - 1, seq + itSV->svStart)), std::string(hdr->target_name[itSV->chr2]), *itSV, itSV->svt);
}
if (!itSV->precise) continue;

Expand Down Expand Up @@ -182,28 +182,6 @@ namespace torali
AlignDescriptor ad;
if (!_findSplit(c, itSV->consensus, svRefStr, align, ad, itSV->svt)) continue;

// Get exact alleles for INS and DEL
if (itSV->svEnd - itSV->svStart <= c.indelsize) {
if ((itSV->svt == 2) || (itSV->svt == 4)) {
std::string refVCF;
std::string altVCF;
int32_t cpos = 0;
bool inSV = false;
for(uint32_t j = 0; j<align.shape()[1]; ++j) {
if (align[0][j] != '-') {
++cpos;
if (cpos == ad.cStart) inSV = true;
else if (cpos == ad.cEnd) inSV = false;
}
if (inSV) {
if (align[0][j] != '-') altVCF += align[0][j];
if (align[1][j] != '-') refVCF += align[1][j];
}
}
itSV->alleles = _addAlleles(refVCF, altVCF);
}
}

// Debug consensus to reference alignment
//std::cerr << "svid:" << itSV->id << "," << itSV->svStart << "," << itSV->svEnd << "," << itSV->svt << ",consensus-to-reference-alignment" << std::endl;
//for(uint32_t i = 0; i<align.shape()[0]; ++i) {
Expand Down
73 changes: 46 additions & 27 deletions src/split.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ namespace torali

// Is this a valid split-read alignment?
if (!_validSRAlignment(ad.cStart, ad.cEnd, ad.rStart, ad.rEnd, svt)) return false;

// Check percent identity
_percentIdentity(align, gS, gE, ad.percId);
//std::cerr << ad.percId << ',' << c.flankQuality << std::endl;
Expand Down Expand Up @@ -645,7 +645,7 @@ namespace torali

template<typename TConfig>
inline bool
_alignConsensus(TConfig const& c, std::string& consensus, std::string& svRefStr, int32_t svt, AlignDescriptor& ad, bool const realign) {
_alignConsensus(TConfig const& c, std::string& consensus, std::string& svRefStr, StructuralVariantRecord& sv, Breakpoint& bp, bool const realign) {
// Realign?
if (realign) {
std::string revc = consensus;
Expand All @@ -665,7 +665,7 @@ namespace torali
typedef boost::multi_array<char, 2> TAlign;
TAlign align;
//std::cerr << "Consensus-to-Reference alignment" << std::endl;
if (!_consRefAlignment(consensus, svRefStr, align, svt)) return false;
if (!_consRefAlignment(consensus, svRefStr, align, sv.svt)) return false;

// Debug consensus to reference alignment
//for(uint32_t i = 0; i < align.shape()[0]; ++i) {
Expand All @@ -678,35 +678,14 @@ namespace torali
//std::cerr << std::endl;

// Check breakpoint
if (!_findSplit(c, consensus, svRefStr, align, ad, svt)) return false;

// All fine
return true;
}

template<typename TConfig>
inline bool
alignConsensus(TConfig const& c, bam_hdr_t* hdr, char const* seq, char const* sndSeq, StructuralVariantRecord& sv, bool const realign) {
if ( (int32_t) sv.consensus.size() < (2 * c.minimumFlankSize + sv.insLen)) return false;

// Get reference slice
Breakpoint bp(sv);
if (sv.svt ==4) {
int32_t bufferSpace = std::max((int32_t) ((sv.consensus.size() - sv.insLen) / 3), c.minimumFlankSize);
_initBreakpoint(hdr, bp, bufferSpace, sv.svt);
} else _initBreakpoint(hdr, bp, sv.consensus.size(), sv.svt);
if (bp.chr != bp.chr2) bp.part1 = _getSVRef(c, sndSeq, bp, bp.chr2, sv.svt);
std::string svRefStr = _getSVRef(c, seq, bp, bp.chr, sv.svt);

// Generate consensus alignment
AlignDescriptor ad;
if (!_alignConsensus(c, sv.consensus, svRefStr, sv.svt, ad, realign)) return false;
if (!_findSplit(c, consensus, svRefStr, align, ad, sv.svt)) return false;

// Get the start and end of the structural variant
// Transform coordinates
unsigned int finalGapStart = 0;
unsigned int finalGapEnd = 0;
if (!_coordTransform(c, svRefStr, bp, ad, finalGapStart, finalGapEnd, sv.svt)) return false;

if ((_translocation(sv.svt)) || (finalGapStart < finalGapEnd)) {
sv.precise=true;
sv.svStart=finalGapStart;
Expand All @@ -719,11 +698,51 @@ namespace torali
sv.ciposhigh = ci_wiggle;
sv.ciendlow = -ci_wiggle;
sv.ciendhigh = ci_wiggle;

// Get exact alleles for INS and DEL
if (sv.svEnd - sv.svStart <= c.indelsize) {
if ((sv.svt == 2) || (sv.svt == 4)) {
std::string refVCF;
std::string altVCF;
int32_t cpos = 0;
bool inSV = false;
for(uint32_t j = 0; j<align.shape()[1]; ++j) {
if (align[0][j] != '-') {
++cpos;
if (cpos == ad.cStart) inSV = true;
else if (cpos == ad.cEnd) inSV = false;
}
if (inSV) {
if (align[0][j] != '-') altVCF += align[0][j];
if (align[1][j] != '-') refVCF += align[1][j];
}
}
sv.alleles = _addAlleles(refVCF, altVCF);
}
}
return true;
} else {
return false;
}
}

template<typename TConfig>
inline bool
alignConsensus(TConfig const& c, bam_hdr_t* hdr, char const* seq, char const* sndSeq, StructuralVariantRecord& sv, bool const realign) {
if ( (int32_t) sv.consensus.size() < (2 * c.minimumFlankSize + sv.insLen)) return false;

// Get reference slice
Breakpoint bp(sv);
if (sv.svt ==4) {
int32_t bufferSpace = std::max((int32_t) ((sv.consensus.size() - sv.insLen) / 3), c.minimumFlankSize);
_initBreakpoint(hdr, bp, bufferSpace, sv.svt);
} else _initBreakpoint(hdr, bp, sv.consensus.size(), sv.svt);
if (bp.chr != bp.chr2) bp.part1 = _getSVRef(c, sndSeq, bp, bp.chr2, sv.svt);
std::string svRefStr = _getSVRef(c, seq, bp, bp.chr, sv.svt);

// Generate consensus alignment
return _alignConsensus(c, sv.consensus, svRefStr, sv, bp, realign);
}

template<typename TConfig>
inline bool
Expand Down

0 comments on commit 11ecbac

Please sign in to comment.