Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Commit

Permalink
Merge pull request #65 from climatepolicyradar/RND-490-text-block-has…
Browse files Browse the repository at this point in the history
…hable

RND-490: making TextBlock objects hashable
  • Loading branch information
mpjuhasz authored Oct 23, 2023
2 parents e2baaa7 + 9565fa9 commit 81b0480
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/cpr_data_access/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,14 +196,19 @@ def to_string(self) -> str:
"""Return text in a clean format"""
return " ".join([line.strip() for line in self.text])

def __hash__(self) -> int:
"""Get hash of the text-block. Based on the text and the text_block_id"""
text_utf8 = self.to_string().encode("utf-8")

return hash(f"{text_utf8}-{self.text_block_id.encode()}")

@cached_property
def text_hash(self) -> str:
"""
Get hash of text block text. If the text block has no text (although this shouldn't be the case), return an empty string.
:return str: md5sum + "__" + sha256, or empty string if the text block has no text
"""

if self.text == "":
return ""

Expand Down
18 changes: 18 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
CPRDocumentMetadata,
Span,
KnowledgeBaseIDs,
TextBlock,
)


Expand Down Expand Up @@ -433,3 +434,20 @@ def test_display_text_block(test_document, test_spans_valid):
assert isinstance(block_html, str)
assert len(block_html) > 0
assert block_html.startswith("<div")


def test_text_block_hashable(test_document):
doc = test_document

set(doc.text_blocks)

first_block_hash = doc.text_blocks[0].__hash__()
assert isinstance(first_block_hash, int)

comparison_block = TextBlock(**doc.text_blocks[0].dict())

assert comparison_block == doc.text_blocks[0]

comparison_block.text_block_id = "0"

assert comparison_block != doc.text_blocks[0]

0 comments on commit 81b0480

Please sign in to comment.