Skip to content

Commit

Permalink
minor changes to hash function, inconsequential to SBT operation
Browse files Browse the repository at this point in the history
  • Loading branch information
rsharris committed Jul 20, 2019
1 parent 9f3b8a0 commit 3edcb49
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 0 deletions.
14 changes: 14 additions & 0 deletions cmd_sabuhash_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ void SabuhashTestCommand::usage
s << " --strings hash strings instead of bits; applies only when input is" << endl;
s << " from sequence files" << endl;
s << " (by default kmers are hashed in 2-bit encoded form)" << endl;
s << " --negate negate hash values; replace values by their ones-complement" << endl;
s << " --modulus=<M> set the hash modulus" << endl;
s << " (by default, the hash values have no modulus)" << endl;
s << " --seed=<number> set the hash function's 32-bit seed" << endl;
Expand All @@ -75,6 +76,7 @@ void SabuhashTestCommand::parse

kmerSize = defaultKmerSize;
useStringKmers = false;
negateHash = false;
modulus = 0;
hashSeed = 0;
#ifdef useJellyHash
Expand Down Expand Up @@ -136,6 +138,11 @@ void SabuhashTestCommand::parse
if (arg == "--strings")
{ useStringKmers = true; continue; }

// --negate

if (arg == "--negate")
{ negateHash = true; continue; }

// --modulus=<M>

if ((is_prefix_of (arg, "--modulus="))
Expand Down Expand Up @@ -278,6 +285,13 @@ void SabuhashTestCommand::perform_hash_test
h64 = hasher->hash(seq);
h64r = hasher->hash(revCompSeq);
}

if (negateHash)
{
h64 = ~h64;
h64r = ~h64r;
}

if (modulus == 0)
isMatch = (h64 == h64r);
else
Expand Down
1 change: 1 addition & 0 deletions cmd_sabuhash_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class SabuhashTestCommand: public Command
std::vector<std::string> seqFilenames;
std::uint32_t kmerSize;
bool useStringKmers;
bool negateHash;
std::uint32_t modulus;
std::uint64_t hashSeed;
HashCanonical* hasher = nullptr;
Expand Down
8 changes: 8 additions & 0 deletions sabuhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ class SabuHash
unsigned int k;
std::uint64_t seed;
bool allowN;
bool validNt; // true => latest nucleotide was valid
unsigned int charsAccumulated;
std::uint64_t kernelTable[256], forwardByK[256], forwardByK2[4];
std::uint64_t hForward;
Expand All @@ -141,6 +142,7 @@ class SabuHash
: k(_k),
seed(_seed),
allowN(_allowN),
validNt(false),
charsAccumulated(0),
hForward(0)
{
Expand Down Expand Up @@ -258,11 +260,13 @@ class SabuHash
if (kernelTable[chIn] == 0)
{
// chIn not in {A,C,G,T}
validNt = false;
charsAccumulated = 0;
hForward = 0;
return 0;
}

validNt = true;
if (charsAccumulated < k)
{
hForward = forward(hForward) ^ kernelTable[chIn];
Expand Down Expand Up @@ -371,6 +375,7 @@ class SabuHashCanonical
unsigned int k;
std::uint64_t seed;
bool allowN;
bool validNt; // true => latest nucleotide was valid
unsigned int charsAccumulated;
std::uint64_t kernelTable[256], kernelTable_RC[256], kernelTable2_RC[4];
std::uint64_t forwardByK[256], forwardByK_RC[256], forwardByK2[4];
Expand All @@ -384,6 +389,7 @@ class SabuHashCanonical
: k(_k),
seed(_seed),
allowN(_allowN),
validNt(false),
charsAccumulated(0),
hForward(0),
hRevComp(0)
Expand Down Expand Up @@ -545,11 +551,13 @@ class SabuHashCanonical
if (kernelTable[chIn] == 0)
{
// chIn not in {A,C,G,T}
validNt = false;
charsAccumulated = 0;
hForward = hRevComp = 0;
return 0;
}

validNt = true;
if (charsAccumulated < k)
{
hForward = SabuHash::forward(hForward) ^ kernelTable[chIn];
Expand Down

0 comments on commit 3edcb49

Please sign in to comment.