-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Numpy-based RLE compression for bitmasks (#809)
* Speed up bitmask operations
- Loading branch information
1 parent
8a90571
commit 1bab699
Showing
7 changed files
with
255 additions
and
125 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
try: | ||
from encord.common.bitmask_operations.bitmask_operations_numpy import ( | ||
_mask_to_rle, | ||
_rle_to_mask, | ||
_rle_to_string, | ||
_string_to_rle, | ||
deserialise_bitmask, | ||
serialise_bitmask, | ||
transpose_bytearray, | ||
) | ||
except ImportError: | ||
from encord.common.bitmask_operations.bitmask_operations import ( | ||
_mask_to_rle, | ||
_rle_to_mask, | ||
_rle_to_string, | ||
_string_to_rle, | ||
deserialise_bitmask, | ||
serialise_bitmask, | ||
transpose_bytearray, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
from itertools import groupby | ||
from typing import List, Sequence, Tuple | ||
|
||
|
||
def _string_to_rle(mask_string: str) -> List[int]: | ||
""" | ||
COCO-compatible string to RLE-encoded mask de-serialisation | ||
""" | ||
cnts: List[int] = [] | ||
p = 0 | ||
|
||
while p < len(mask_string): | ||
x = 0 | ||
k = 0 | ||
more = 1 | ||
|
||
while more and p < len(mask_string): | ||
c = ord(mask_string[p]) - 48 | ||
x |= (c & 0x1F) << (5 * k) | ||
more = c & 0x20 | ||
p += 1 | ||
k += 1 | ||
|
||
if not more and (c & 0x10): | ||
x |= -1 << (5 * k) | ||
|
||
if len(cnts) > 2: | ||
x += cnts[-2] | ||
|
||
cnts.append(x) | ||
|
||
return cnts | ||
|
||
|
||
def _rle_to_string(rle: Sequence[int]) -> str: | ||
""" | ||
COCO-compatible RLE-encoded mask to string serialisation | ||
""" | ||
rle_string = "" | ||
for i, x in enumerate(rle): | ||
if i > 2: | ||
x -= rle[i - 2] | ||
|
||
more = 1 | ||
while more: | ||
c = x & 0x1F | ||
x >>= 5 | ||
|
||
if c & 0x10: | ||
more = x != -1 | ||
else: | ||
more = x != 0 | ||
|
||
if more: | ||
c |= 0x20 | ||
|
||
c += 48 | ||
rle_string += chr(c) | ||
|
||
return rle_string | ||
|
||
|
||
def _mask_to_rle(mask: bytes) -> List[int]: | ||
""" | ||
COCO-compatible raw bitmask to COCO-compatible RLE | ||
""" | ||
return [len(list(group)) for _, group in groupby(mask)] | ||
|
||
|
||
def _rle_to_mask(rle: List[int], size: int) -> bytes: | ||
""" | ||
COCO-compatible RLE to bitmask | ||
""" | ||
res = bytearray(size) | ||
offset = 0 | ||
|
||
for i, c in enumerate(rle): | ||
v = i % 2 | ||
while c > 0: | ||
res[offset] = v | ||
offset += 1 | ||
c -= 1 | ||
|
||
return bytes(res) | ||
|
||
|
||
def serialise_bitmask(bitmask: bytes) -> str: | ||
rle = _mask_to_rle(bitmask) | ||
return _rle_to_string(rle) | ||
|
||
|
||
def deserialise_bitmask(serialised_bitmask: str, length: int) -> bytes: | ||
rle = _string_to_rle(serialised_bitmask) | ||
return _rle_to_mask(rle, length) | ||
|
||
|
||
def transpose_bytearray(byte_data: bytes, shape: Tuple[int, int]) -> bytes: | ||
rows, cols = shape | ||
transposed_byte_data = bytearray(len(byte_data)) | ||
for row in range(rows): | ||
for col in range(cols): | ||
transposed_byte_data[col * rows + row] = byte_data[row * cols + col] | ||
|
||
return transposed_byte_data |
32 changes: 32 additions & 0 deletions
32
encord/common/bitmask_operations/bitmask_operations_numpy.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from typing import List, Sequence, Tuple | ||
|
||
import numpy as np | ||
|
||
# Importing python implementations of functions that not have numpy implementation | ||
from .bitmask_operations import _rle_to_mask, _rle_to_string, _string_to_rle | ||
|
||
|
||
def _mask_to_rle(mask: bytes) -> List[int]: | ||
""" | ||
COCO-compatible raw bitmask to COCO-compatible RLE | ||
""" | ||
mask_buffer = np.frombuffer(mask, dtype=np.bool_) | ||
changes = np.diff(mask_buffer, prepend=mask_buffer[0], append=mask_buffer[-1]) | ||
change_indices = np.flatnonzero(changes != 0) | ||
run_lengths = np.diff(np.concatenate(([0], change_indices, [len(mask_buffer)]))) | ||
return run_lengths.tolist() | ||
|
||
|
||
def serialise_bitmask(bitmask: bytes) -> str: | ||
rle = _mask_to_rle(bitmask) | ||
return _rle_to_string(rle) | ||
|
||
|
||
def deserialise_bitmask(serialised_bitmask: str, length: int) -> bytes: | ||
rle = _string_to_rle(serialised_bitmask) | ||
return _rle_to_mask(rle, length) | ||
|
||
|
||
def transpose_bytearray(byte_data: bytes, shape: Tuple[int, int]) -> bytes: | ||
np_byte_data = np.frombuffer(byte_data, dtype=np.int8).reshape(shape) | ||
return bytearray(np_byte_data.T.tobytes()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.