|
12 | 12 | # See the License for the specific language governing permissions and
|
13 | 13 | # limitations under the License.
|
14 | 14 |
|
| 15 | +import base64 |
| 16 | +import gzip |
15 | 17 | import json
|
16 | 18 | import math
|
17 | 19 | import random
|
18 | 20 | import string
|
19 | 21 | import tempfile
|
20 |
| -from dataclasses import dataclass |
| 22 | +from dataclasses import asdict, dataclass |
21 | 23 | from pathlib import Path
|
22 | 24 | from typing import List, Tuple
|
23 | 25 |
|
24 | 26 | from magika import Magika
|
25 | 27 | from magika.seekable import Buffer
|
| 28 | +from tests.utils import get_tests_data_dir |
26 | 29 |
|
27 | 30 | random.seed(42)
|
28 | 31 |
|
@@ -196,5 +199,40 @@ def _generate_pattern(size: int) -> bytearray:
|
196 | 199 | return pattern
|
197 | 200 |
|
198 | 201 |
|
| 202 | +def generate_reference_features_extraction(): |
| 203 | + features_size = 512 |
| 204 | + padding_token = 256 |
| 205 | + block_size = 1024 |
| 206 | + |
| 207 | + test_suite = get_features_extraction_test_suite(features_size, block_size) |
| 208 | + |
| 209 | + ref_features_extraction_tests = [] |
| 210 | + |
| 211 | + for test_info, test_content in test_suite: |
| 212 | + s = Buffer(test_content) |
| 213 | + features_v1 = Magika._extract_features_from_seekable( |
| 214 | + s, features_size, features_size, features_size, padding_token, block_size |
| 215 | + ) |
| 216 | + features_v2 = Magika._extract_features_from_seekable_v2( |
| 217 | + s, features_size, features_size, features_size, padding_token, block_size |
| 218 | + ) |
| 219 | + |
| 220 | + test_case = { |
| 221 | + "test_info": asdict(test_info), |
| 222 | + "content": base64.b64encode(test_content).decode("ascii"), |
| 223 | + "features_v1": asdict(features_v1), |
| 224 | + "features_v2": asdict(features_v2), |
| 225 | + } |
| 226 | + ref_features_extraction_tests.append(test_case) |
| 227 | + |
| 228 | + ref_features_extraction_tests_path = ( |
| 229 | + get_tests_data_dir() / "features_extraction" / "reference.json.gz" |
| 230 | + ) |
| 231 | + ref_features_extraction_tests_path.parent.mkdir(parents=True, exist_ok=True) |
| 232 | + ref_features_extraction_tests_path.write_bytes( |
| 233 | + gzip.compress(json.dumps(ref_features_extraction_tests).encode("ascii")) |
| 234 | + ) |
| 235 | + |
| 236 | + |
199 | 237 | if __name__ == "__main__":
|
200 |
| - test_features_extraction(debug=False) |
| 238 | + generate_reference_features_extraction() |
0 commit comments