Skip to content

Commit 484590c

Browse files
authored
feat: BI-6560 reuse values in excel parser (#1232)
feat: BI-6560 reuse values in excel parser (#1232)
1 parent 3438b7d commit 484590c

File tree

3 files changed

+45
-11
lines changed

3 files changed

+45
-11
lines changed

lib/dl_file_secure_reader_lib/dl_file_secure_reader_lib/resources/reader.py

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,60 @@
33
import asyncio
44
import io
55
import logging
6-
from typing import BinaryIO
6+
from typing import (
7+
Any,
8+
BinaryIO,
9+
)
710

811
from aiohttp import web
912
from aiohttp.multipart import BodyPartReader
13+
import attr
14+
import frozendict
1015
import openpyxl
16+
import openpyxl.cell.cell
1117

1218
from dl_file_secure_reader_lib.settings import FileSecureReaderSettings
1319

1420

1521
LOGGER = logging.getLogger(__name__)
1622

1723

24+
@attr.s(frozen=True)
25+
class CachedCellProcessor:
26+
"""
27+
Cache wrappers for cell values to prevent excessive memory usage when excel
28+
file is made of duplicate values.
29+
"""
30+
31+
cache_values: dict[tuple[str, Any], Any] = attr.ib(factory=dict, init=False)
32+
33+
def process_cell(self, cell: openpyxl.cell.cell.Cell) -> dict:
34+
if cell.data_type == "d":
35+
cell_value = str(cell.value)
36+
else:
37+
cell_value = cell.value
38+
39+
if isinstance(cell.value, int):
40+
cell_type = "i"
41+
else:
42+
cell_type = cell.data_type
43+
44+
cache_key = (cell_type, cell_value)
45+
46+
if cache_key not in self.cache_values:
47+
self.cache_values[cache_key] = frozendict.frozendict(
48+
data_type=cell_type,
49+
value=cell_value,
50+
)
51+
52+
return self.cache_values[cache_key]
53+
54+
1855
def parse_excel_data(data: BinaryIO, feature_excel_read_only: bool) -> list:
1956
result = []
2057

58+
cell_processor = CachedCellProcessor()
59+
2160
try:
2261
wb = openpyxl.load_workbook(
2362
data,
@@ -36,16 +75,7 @@ def parse_excel_data(data: BinaryIO, feature_excel_read_only: bool) -> list:
3675
result.append(
3776
{
3877
"sheetname": sheetname,
39-
"data": [
40-
[
41-
{
42-
"value": str(cell.value) if cell.data_type == "d" else cell.value,
43-
"data_type": "i" if isinstance(cell.value, int) else cell.data_type,
44-
}
45-
for cell in row
46-
]
47-
for row in sheet.rows
48-
],
78+
"data": [[cell_processor.process_cell(cell) for cell in row] for row in sheet.rows],
4979
}
5080
)
5181
return result

lib/dl_file_secure_reader_lib/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ version = "0.1.0"
99

1010
[tool.poetry.dependencies]
1111
aiohttp = "*"
12+
attrs = "*"
1213
dl-settings = {path = "../dl_settings"}
14+
frozendict = "*"
1315
openpyxl = "*"
1416
pydantic = "*"
1517
python = ">=3.10, <3.13"

metapkg/poetry.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)