Skip to content

Fix flushing behaviour in threaded mode. #59

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -9,6 +9,8 @@ Changelog

version 0.5.1-dev
-----------------
+ Fix a bug where flushing in threaded mode did not write the data to the
output file.
+ Threaded reading and writing do no longer block exiting when an exception
occurs in the main thread.

49 changes: 25 additions & 24 deletions src/zlib_ng/gzip_ng_threaded.py
Original file line number Diff line number Diff line change
@@ -60,7 +60,7 @@ def open(filename, mode="rb", compresslevel=gzip_ng._COMPRESS_LEVEL_TRADEOFF,
gzip_file = io.BufferedReader(
_ThreadedGzipReader(filename, block_size=block_size))
else:
gzip_file = io.BufferedWriter(
gzip_file = FlushableBufferedWriter(
_ThreadedGzipWriter(
filename,
mode.replace("t", "b"),
@@ -167,6 +167,12 @@ def closed(self) -> bool:
return self._closed


class FlushableBufferedWriter(io.BufferedWriter):
def flush(self):
super().flush()
self.raw.flush()


class _ThreadedGzipWriter(io.RawIOBase):
"""
Write a gzip file using multiple threads.
@@ -315,30 +321,35 @@ def write(self, b) -> int:
self.input_queues[worker_index].put((data, zdict))
return len(data)

def flush(self):
def _end_gzip_stream(self):
self._check_closed()
# Wait for all data to be compressed
for in_q in self.input_queues:
in_q.join()
# Wait for all data to be written
for out_q in self.output_queues:
out_q.join()
# Write an empty deflate block with a lost block marker.
self.raw.write(zlib_ng.compress(b"", wbits=-15))
trailer = struct.pack("<II", self._crc, self._size & 0xFFFFFFFF)
self.raw.write(trailer)
self._crc = 0
self._size = 0
self.raw.flush()

def flush(self):
self._end_gzip_stream()
self._write_gzip_header()

def close(self) -> None:
if self._closed:
return
self.flush()
self._end_gzip_stream()
self.stop()
if self.exception:
self.raw.close()
self._closed = True
raise self.exception
# Write an empty deflate block with a lost block marker.
self.raw.write(zlib_ng.compress(b"", wbits=-15))
trailer = struct.pack("<II", self._crc, self._size & 0xFFFFFFFF)
self.raw.write(trailer)
self.raw.flush()
if self.closefd:
self.raw.close()
self._closed = True
@@ -371,41 +382,31 @@ def _compress(self, index: int):
def _write(self):
index = 0
output_queues = self.output_queues
fp = self.raw
total_crc = 0
size = 0
while self._calling_thread.is_alive():
out_index = index % self.threads
output_queue = output_queues[out_index]
try:
compressed, crc, data_length = output_queue.get(timeout=0.05)
except queue.Empty:
if not self.running:
self._crc = total_crc
self._size = size
return
continue
total_crc = zlib_ng.crc32_combine(total_crc, crc, data_length)
size += data_length
fp.write(compressed)
self._crc = zlib_ng.crc32_combine(self._crc, crc, data_length)
self._size += data_length
self.raw.write(compressed)
output_queue.task_done()
index += 1

def _compress_and_write(self):
if not self.threads == 1:
raise SystemError("Compress_and_write is for one thread only")
fp = self.raw
total_crc = 0
size = 0
in_queue = self.input_queues[0]
compressor = self.compressors[0]
while self._calling_thread.is_alive():
try:
data, zdict = in_queue.get(timeout=0.05)
except queue.Empty:
if not self.running:
self._crc = total_crc
self._size = size
return
continue
try:
@@ -415,9 +416,9 @@ def _compress_and_write(self):
self._set_error_and_empty_queue(e, in_queue)
return
data_length = len(data)
total_crc = zlib_ng.crc32_combine(total_crc, crc, data_length)
size += data_length
fp.write(compressed)
self._crc = zlib_ng.crc32_combine(self._crc, crc, data_length)
self._size += data_length
self.raw.write(compressed)
in_queue.task_done()

def _set_error_and_empty_queue(self, error, q):
16 changes: 16 additions & 0 deletions tests/test_gzip_ng_threaded.py
Original file line number Diff line number Diff line change
@@ -230,3 +230,19 @@ def test_threaded_program_can_exit_on_error(tmp_path, mode, threads):
)
f.write("raise Exception('Error')\n")
subprocess.run([sys.executable, str(program)])


@pytest.mark.parametrize("threads", [1, 2])
def test_flush(tmp_path, threads):
test_file = tmp_path / "output.gz"
with gzip_ng_threaded.open(test_file, "wb", threads=threads) as f:
f.write(b"1")
f.flush()
assert gzip.decompress(test_file.read_bytes()) == b"1"
f.write(b"2")
f.flush()
assert gzip.decompress(test_file.read_bytes()) == b"12"
f.write(b"3")
f.flush()
assert gzip.decompress(test_file.read_bytes()) == b"123"
assert gzip.decompress(test_file.read_bytes()) == b"123"