Skip to content

Commit 8a3c708

Browse files
authored
Merge pull request #285 from google/features_extraction_tests_suite
Features extraction tests suite
2 parents b0868af + 7e9a019 commit 8a3c708

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

python/tests/test_features_extraction.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,20 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import base64
16+
import gzip
1517
import json
1618
import math
1719
import random
1820
import string
1921
import tempfile
20-
from dataclasses import dataclass
22+
from dataclasses import asdict, dataclass
2123
from pathlib import Path
2224
from typing import List, Tuple
2325

2426
from magika import Magika
2527
from magika.seekable import Buffer
28+
from tests.utils import get_tests_data_dir
2629

2730
random.seed(42)
2831

@@ -196,5 +199,40 @@ def _generate_pattern(size: int) -> bytearray:
196199
return pattern
197200

198201

202+
def generate_reference_features_extraction():
203+
features_size = 512
204+
padding_token = 256
205+
block_size = 1024
206+
207+
test_suite = get_features_extraction_test_suite(features_size, block_size)
208+
209+
ref_features_extraction_tests = []
210+
211+
for test_info, test_content in test_suite:
212+
s = Buffer(test_content)
213+
features_v1 = Magika._extract_features_from_seekable(
214+
s, features_size, features_size, features_size, padding_token, block_size
215+
)
216+
features_v2 = Magika._extract_features_from_seekable_v2(
217+
s, features_size, features_size, features_size, padding_token, block_size
218+
)
219+
220+
test_case = {
221+
"test_info": asdict(test_info),
222+
"content": base64.b64encode(test_content).decode("ascii"),
223+
"features_v1": asdict(features_v1),
224+
"features_v2": asdict(features_v2),
225+
}
226+
ref_features_extraction_tests.append(test_case)
227+
228+
ref_features_extraction_tests_path = (
229+
get_tests_data_dir() / "features_extraction" / "reference.json.gz"
230+
)
231+
ref_features_extraction_tests_path.parent.mkdir(parents=True, exist_ok=True)
232+
ref_features_extraction_tests_path.write_bytes(
233+
gzip.compress(json.dumps(ref_features_extraction_tests).encode("ascii"))
234+
)
235+
236+
199237
if __name__ == "__main__":
200-
test_features_extraction(debug=False)
238+
generate_reference_features_extraction()
Binary file not shown.

0 commit comments

Comments
 (0)