3
3
import asyncio
4
4
import io
5
5
import logging
6
- from typing import BinaryIO
6
+ from typing import (
7
+ Any ,
8
+ BinaryIO ,
9
+ )
7
10
8
11
from aiohttp import web
9
12
from aiohttp .multipart import BodyPartReader
13
+ import attr
14
+ import frozendict
10
15
import openpyxl
16
+ import openpyxl .cell .cell
11
17
12
18
from dl_file_secure_reader_lib .settings import FileSecureReaderSettings
13
19
14
20
15
21
LOGGER = logging .getLogger (__name__ )
16
22
17
23
24
+ @attr .s (frozen = True )
25
+ class CachedCellProcessor :
26
+ """
27
+ Cache wrappers for cell values to prevent excessive memory usage when excel
28
+ file is made of duplicate values.
29
+ """
30
+
31
+ cache_values : dict [tuple [str , Any ], Any ] = attr .ib (factory = dict , init = False )
32
+
33
+ def process_cell (self , cell : openpyxl .cell .cell .Cell ) -> dict :
34
+ if cell .data_type == "d" :
35
+ cell_value = str (cell .value )
36
+ else :
37
+ cell_value = cell .value
38
+
39
+ if isinstance (cell .value , int ):
40
+ cell_type = "i"
41
+ else :
42
+ cell_type = cell .data_type
43
+
44
+ cache_key = (cell_type , cell_value )
45
+
46
+ if cache_key not in self .cache_values :
47
+ self .cache_values [cache_key ] = frozendict .frozendict (
48
+ data_type = cell_type ,
49
+ value = cell_value ,
50
+ )
51
+
52
+ return self .cache_values [cache_key ]
53
+
54
+
18
55
def parse_excel_data (data : BinaryIO , feature_excel_read_only : bool ) -> list :
19
56
result = []
20
57
58
+ cell_processor = CachedCellProcessor ()
59
+
21
60
try :
22
61
wb = openpyxl .load_workbook (
23
62
data ,
@@ -36,16 +75,7 @@ def parse_excel_data(data: BinaryIO, feature_excel_read_only: bool) -> list:
36
75
result .append (
37
76
{
38
77
"sheetname" : sheetname ,
39
- "data" : [
40
- [
41
- {
42
- "value" : str (cell .value ) if cell .data_type == "d" else cell .value ,
43
- "data_type" : "i" if isinstance (cell .value , int ) else cell .data_type ,
44
- }
45
- for cell in row
46
- ]
47
- for row in sheet .rows
48
- ],
78
+ "data" : [[cell_processor .process_cell (cell ) for cell in row ] for row in sheet .rows ],
49
79
}
50
80
)
51
81
return result
0 commit comments