Skip to content

Commit

Permalink
fix: protein hashing
Browse files Browse the repository at this point in the history
  • Loading branch information
chasemc committed Oct 20, 2024
1 parent 33be90c commit 4ee41be
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
2 changes: 2 additions & 0 deletions socialgene/base/molbio.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def _standardize_sequence(self):
amino acids, raising an error if an unknown character is found.
"""
self.sequence = self.sequence.upper()
self.sequence = self.sequence.replace(" ", "")
self.sequence = self.sequence.strip("*")
if not all([i in self._amino_acids for i in set(self.sequence)]):
log.error(self.sequence)
raise ValueError("Unknown character/letter in protein sequence")
Expand Down
2 changes: 1 addition & 1 deletion socialgene/hashing/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def hash_aminos(input, **kwargs):
# make sure everything is uppercase before hashing
cleaned = input.upper()
# remove all whitespace
cleaned = "".join(cleaned.split())
cleaned = cleaned.replace(" ", "")
# remove "*" if it's at the beginning or end of the string but not in the middle
cleaned = cleaned.strip("*")
return hasher(input=cleaned, **kwargs)
Expand Down

0 comments on commit 4ee41be

Please sign in to comment.