|
1 | 1 | """ |
2 | | -Assumption: |
3 | | - - The values to compress are assumed to be comparable, |
4 | | - values can be sorted and compared with '<' and '>' operators. |
| 2 | +Coordinate Compression Utility |
| 3 | +------------------------------ |
| 4 | +
|
| 5 | +Fix for Issue #13226: Handles missing or invalid values (None, NaN) |
| 6 | +to ensure consistent compression behavior. |
| 7 | +
|
| 8 | +This module provides a `CoordinateCompressor` class that safely compresses |
| 9 | +and decompresses values from a list by mapping each unique valid value |
| 10 | +to a unique integer index. |
| 11 | +
|
| 12 | +Invalid or non-comparable values (like None or NaN) are ignored during |
| 13 | +compression mapping and return -1 when compressed. |
5 | 14 | """ |
6 | 15 |
|
| 16 | +from __future__ import annotations |
| 17 | + |
| 18 | +import math |
| 19 | +from typing import Any |
| 20 | + |
7 | 21 |
|
8 | 22 | class CoordinateCompressor: |
9 | 23 | """ |
10 | | - A class for coordinate compression. |
11 | | -
|
12 | | - This class allows you to compress and decompress a list of values. |
13 | | -
|
14 | | - Mapping: |
15 | | - In addition to compression and decompression, this class maintains a mapping |
16 | | - between original values and their compressed counterparts using two data |
17 | | - structures: a dictionary `coordinate_map` and a list `reverse_map`: |
18 | | - - `coordinate_map`: A dictionary that maps original values to their compressed |
19 | | - coordinates. Keys are original values, and values are compressed coordinates. |
20 | | - - `reverse_map`: A list used for reverse mapping, where each index corresponds |
21 | | - to a compressed coordinate, and the value at that index is the original value. |
22 | | -
|
23 | | - Example of mapping: |
24 | | - Original: 10, Compressed: 0 |
25 | | - Original: 52, Compressed: 1 |
26 | | - Original: 83, Compressed: 2 |
27 | | - Original: 100, Compressed: 3 |
28 | | -
|
29 | | - This mapping allows for efficient compression and decompression of values within |
30 | | - the list. |
| 24 | + CoordinateCompressor compresses comparable values to integer ranks. |
| 25 | +
|
| 26 | + Example: |
| 27 | + >>> arr = [100, 10, 52, 83] |
| 28 | + >>> cc = CoordinateCompressor(arr) |
| 29 | + >>> cc.compress(100) |
| 30 | + 3 |
| 31 | + >>> cc.compress(52) |
| 32 | + 1 |
| 33 | + >>> cc.decompress(1) |
| 34 | + 52 |
| 35 | + >>> cc.compress(None) |
| 36 | + -1 |
31 | 37 | """ |
32 | 38 |
|
33 | | - def __init__(self, arr: list[int | float | str]) -> None: |
| 39 | + def __init__(self, arr: list[Any]) -> None: |
34 | 40 | """ |
35 | 41 | Initialize the CoordinateCompressor with a list. |
36 | 42 |
|
37 | 43 | Args: |
38 | | - arr: The list of values to be compressed. |
39 | | -
|
40 | | - >>> arr = [100, 10, 52, 83] |
41 | | - >>> cc = CoordinateCompressor(arr) |
42 | | - >>> cc.compress(100) |
43 | | - 3 |
44 | | - >>> cc.compress(52) |
45 | | - 1 |
46 | | - >>> cc.decompress(1) |
47 | | - 52 |
48 | | - """ |
49 | | - |
50 | | - # A dictionary to store compressed coordinates |
51 | | - self.coordinate_map: dict[int | float | str, int] = {} |
52 | | - |
53 | | - # A list to store reverse mapping |
54 | | - self.reverse_map: list[int | float | str] = [-1] * len(arr) |
| 44 | + arr: The list of values to be compressed. |
55 | 45 |
|
56 | | - self.arr = sorted(arr) # The input list |
57 | | - self.n = len(arr) # The length of the input list |
58 | | - self.compress_coordinates() |
| 46 | + Invalid or missing values (None, NaN) are skipped when building |
| 47 | + the mapping, ensuring consistent compression behavior. |
59 | 48 |
|
60 | | - def compress_coordinates(self) -> None: |
61 | | - """ |
62 | | - Compress the coordinates in the input list. |
63 | | -
|
64 | | - >>> arr = [100, 10, 52, 83] |
| 49 | + >>> arr = [100, None, 52, 83, float("nan")] |
65 | 50 | >>> cc = CoordinateCompressor(arr) |
66 | | - >>> cc.coordinate_map[83] |
| 51 | + >>> cc.compress(100) |
67 | 52 | 2 |
68 | | - >>> cc.coordinate_map[80] # Value not in the original list |
69 | | - Traceback (most recent call last): |
70 | | - ... |
71 | | - KeyError: 80 |
72 | | - >>> cc.reverse_map[2] |
73 | | - 83 |
| 53 | + >>> cc.compress(None) |
| 54 | + -1 |
| 55 | + >>> cc.compress(float("nan")) |
| 56 | + -1 |
74 | 57 | """ |
75 | | - key = 0 |
76 | | - for val in self.arr: |
77 | | - if val not in self.coordinate_map: |
78 | | - self.coordinate_map[val] = key |
79 | | - self.reverse_map[key] = val |
80 | | - key += 1 |
81 | | - |
82 | | - def compress(self, original: float | str) -> int: |
| 58 | + # Store the original list |
| 59 | + self.original = list(arr) |
| 60 | + |
| 61 | + # Filter valid (comparable) values — ignore None and NaN |
| 62 | + valid_values = [ |
| 63 | + x |
| 64 | + for x in arr |
| 65 | + if x is not None and not (isinstance(x, float) and math.isnan(x)) |
| 66 | + ] |
| 67 | + |
| 68 | + # Sort and remove duplicates using dict.fromkeys for stable order |
| 69 | + unique_sorted = sorted(dict.fromkeys(valid_values)) |
| 70 | + |
| 71 | + # Create mappings |
| 72 | + self.coordinate_map: dict[Any, int] = { |
| 73 | + v: i for i, v in enumerate(unique_sorted) |
| 74 | + } |
| 75 | + self.reverse_map: list[Any] = unique_sorted.copy() |
| 76 | + |
| 77 | + # Track invalid values (for reference, not essential) |
| 78 | + self.invalid_values: list[Any] = [ |
| 79 | + x |
| 80 | + for x in arr |
| 81 | + if x is None or (isinstance(x, float) and math.isnan(x)) |
| 82 | + ] |
| 83 | + |
| 84 | + def compress(self, original: Any) -> int: |
83 | 85 | """ |
84 | | - Compress a single value. |
85 | | -
|
86 | | - Args: |
87 | | - original: The value to compress. |
| 86 | + Compress a single value to its coordinate index. |
88 | 87 |
|
89 | 88 | Returns: |
90 | | - The compressed integer, or -1 if not found in the original list. |
| 89 | + int: The compressed index, or -1 if invalid or not found. |
91 | 90 |
|
92 | 91 | >>> arr = [100, 10, 52, 83] |
93 | 92 | >>> cc = CoordinateCompressor(arr) |
94 | | - >>> cc.compress(100) |
95 | | - 3 |
96 | | - >>> cc.compress(7) # Value not in the original list |
| 93 | + >>> cc.compress(10) |
| 94 | + 0 |
| 95 | + >>> cc.compress(7) |
| 96 | + -1 |
| 97 | + >>> cc.compress(None) |
97 | 98 | -1 |
98 | 99 | """ |
| 100 | + # Handle invalid or missing values |
| 101 | + if original is None: |
| 102 | + return -1 |
| 103 | + if isinstance(original, float) and math.isnan(original): |
| 104 | + return -1 |
99 | 105 | return self.coordinate_map.get(original, -1) |
100 | 106 |
|
101 | | - def decompress(self, num: int) -> int | float | str: |
| 107 | + def decompress(self, num: int) -> Any: |
102 | 108 | """ |
103 | | - Decompress a single integer. |
| 109 | + Decompress an integer coordinate back to its original value. |
104 | 110 |
|
105 | 111 | Args: |
106 | | - num: The compressed integer to decompress. |
| 112 | + num: Compressed index to decompress. |
107 | 113 |
|
108 | 114 | Returns: |
109 | | - The original value. |
| 115 | + The original value for valid indices, otherwise -1. |
110 | 116 |
|
111 | 117 | >>> arr = [100, 10, 52, 83] |
112 | 118 | >>> cc = CoordinateCompressor(arr) |
113 | 119 | >>> cc.decompress(0) |
114 | 120 | 10 |
115 | | - >>> cc.decompress(5) # Compressed coordinate out of range |
| 121 | + >>> cc.decompress(5) |
116 | 122 | -1 |
117 | 123 | """ |
118 | | - return self.reverse_map[num] if 0 <= num < len(self.reverse_map) else -1 |
| 124 | + if 0 <= num < len(self.reverse_map): |
| 125 | + return self.reverse_map[num] |
| 126 | + return -1 |
119 | 127 |
|
120 | 128 |
|
121 | 129 | if __name__ == "__main__": |
122 | 130 | from doctest import testmod |
123 | 131 |
|
124 | 132 | testmod() |
125 | 133 |
|
126 | | - arr: list[int | float | str] = [100, 10, 52, 83] |
| 134 | + arr: list[Any] = [100, 10, 52, 83, None, float("nan")] |
127 | 135 | cc = CoordinateCompressor(arr) |
128 | 136 |
|
| 137 | + print("Coordinate Compression Demo:\n") |
129 | 138 | for original in arr: |
130 | 139 | compressed = cc.compress(original) |
131 | 140 | decompressed = cc.decompress(compressed) |
132 | | - print(f"Original: {decompressed}, Compressed: {compressed}") |
| 141 | + print( |
| 142 | + f"Original: {original!r:>6} | " |
| 143 | + f"Compressed: {compressed:>2} | " |
| 144 | + f"Decompressed: {decompressed!r}" |
| 145 | + ) |
| 146 | + |
0 commit comments