Skip to content

Commit 1963763

Browse files
authored
Merge pull request #61 from pycompression/release_0.5.1
Release 0.5.1
2 parents ce8a54d + 1449b99 commit 1963763

File tree

6 files changed

+82
-41
lines changed

6 files changed

+82
-41
lines changed

CHANGELOG.rst

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ Changelog
77
.. This document is user facing. Please word the changes in such a way
88
.. that users understand how the changes affect the new version.
99
10+
version 0.5.1
11+
-----------------
12+
+ Fix a bug where flushing in threaded mode did not write the data to the
13+
output file.
14+
+ Threaded reading and writing do no longer block exiting when an exception
15+
occurs in the main thread.
16+
1017
version 0.5.0
1118
-----------------
1219
+ Wheels are now build for MacOS arm64 architectures.

docs/conf.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,19 @@
44
# list see the documentation:
55
# https://www.sphinx-doc.org/en/master/usage/configuration.html
66

7-
from distutils.dist import DistributionMetadata
8-
from pathlib import Path
9-
10-
import pkg_resources
7+
import zlib_ng
118

129
# -- Project information -----------------------------------------------------
1310

14-
# Get package information from the installed package.
15-
package = pkg_resources.get_distribution("zlib-ng")
16-
metadata_file = Path(package.egg_info) / Path(package.PKG_INFO)
17-
metadata = DistributionMetadata(path=str(metadata_file))
18-
1911
project = 'python-zlib-ng'
2012
copyright = '2023, Leiden University Medical Center'
2113
author = 'Leiden University Medical Center'
2214

15+
2316
# The short X.Y version
24-
version = package.parsed_version.base_version
17+
version = zlib_ng.__version__
2518
# The full version, including alpha/beta/rc tags
26-
release = package.version
19+
release = zlib_ng.__version__
2720

2821

2922
# -- General configuration ---------------------------------------------------

requirements-docs.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
sphinx
22
setuptools
3-
sphinx-rtd-theme>=1.2.0rc3,<1.3
3+
sphinx-rtd-theme
44
sphinx-argparse

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def build_zlib_ng():
151151
"Programming Language :: Python :: 3.10",
152152
"Programming Language :: Python :: 3.11",
153153
"Programming Language :: Python :: 3.12",
154+
"Programming Language :: Python :: 3.13",
154155
"Programming Language :: Python :: Implementation :: CPython",
155156
"Programming Language :: Python :: Implementation :: PyPy",
156157
"Programming Language :: C",

src/zlib_ng/gzip_ng_threaded.py

+32-29
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def open(filename, mode="rb", compresslevel=gzip_ng._COMPRESS_LEVEL_TRADEOFF,
6060
gzip_file = io.BufferedReader(
6161
_ThreadedGzipReader(filename, block_size=block_size))
6262
else:
63-
gzip_file = io.BufferedWriter(
63+
gzip_file = FlushableBufferedWriter(
6464
_ThreadedGzipWriter(
6565
filename,
6666
mode.replace("t", "b"),
@@ -101,6 +101,7 @@ def __init__(self, filename, queue_size=2, block_size=1024 * 1024):
101101
self.worker = threading.Thread(target=self._decompress)
102102
self._closed = False
103103
self.running = True
104+
self._calling_thread = threading.current_thread()
104105
self.worker.start()
105106

106107
def _check_closed(self, msg=None):
@@ -110,15 +111,15 @@ def _check_closed(self, msg=None):
110111
def _decompress(self):
111112
block_size = self.block_size
112113
block_queue = self.queue
113-
while self.running:
114+
while self.running and self._calling_thread.is_alive():
114115
try:
115116
data = self.fileobj.read(block_size)
116117
except Exception as e:
117118
self.exception = e
118119
return
119120
if not data:
120121
return
121-
while self.running:
122+
while self.running and self._calling_thread.is_alive():
122123
try:
123124
block_queue.put(data, timeout=0.05)
124125
break
@@ -166,6 +167,12 @@ def closed(self) -> bool:
166167
return self._closed
167168

168169

170+
class FlushableBufferedWriter(io.BufferedWriter):
171+
def flush(self):
172+
super().flush()
173+
self.raw.flush()
174+
175+
169176
class _ThreadedGzipWriter(io.RawIOBase):
170177
"""
171178
Write a gzip file using multiple threads.
@@ -215,6 +222,7 @@ def __init__(self,
215222
if "b" not in mode:
216223
mode += "b"
217224
self.lock = threading.Lock()
225+
self._calling_thread = threading.current_thread()
218226
self.exception: Optional[Exception] = None
219227
self.level = level
220228
self.previous_block = b""
@@ -313,30 +321,35 @@ def write(self, b) -> int:
313321
self.input_queues[worker_index].put((data, zdict))
314322
return len(data)
315323

316-
def flush(self):
324+
def _end_gzip_stream(self):
317325
self._check_closed()
318326
# Wait for all data to be compressed
319327
for in_q in self.input_queues:
320328
in_q.join()
321329
# Wait for all data to be written
322330
for out_q in self.output_queues:
323331
out_q.join()
332+
# Write an empty deflate block with a lost block marker.
333+
self.raw.write(zlib_ng.compress(b"", wbits=-15))
334+
trailer = struct.pack("<II", self._crc, self._size & 0xFFFFFFFF)
335+
self.raw.write(trailer)
336+
self._crc = 0
337+
self._size = 0
324338
self.raw.flush()
325339

340+
def flush(self):
341+
self._end_gzip_stream()
342+
self._write_gzip_header()
343+
326344
def close(self) -> None:
327345
if self._closed:
328346
return
329-
self.flush()
347+
self._end_gzip_stream()
330348
self.stop()
331349
if self.exception:
332350
self.raw.close()
333351
self._closed = True
334352
raise self.exception
335-
# Write an empty deflate block with a lost block marker.
336-
self.raw.write(zlib_ng.compress(b"", wbits=-15))
337-
trailer = struct.pack("<II", self._crc, self._size & 0xFFFFFFFF)
338-
self.raw.write(trailer)
339-
self.raw.flush()
340353
if self.closefd:
341354
self.raw.close()
342355
self._closed = True
@@ -353,7 +366,7 @@ def _compress(self, index: int):
353366
try:
354367
data, zdict = in_queue.get(timeout=0.05)
355368
except queue.Empty:
356-
if not self.running:
369+
if not (self.running and self._calling_thread.is_alive()):
357370
return
358371
continue
359372
try:
@@ -369,41 +382,31 @@ def _compress(self, index: int):
369382
def _write(self):
370383
index = 0
371384
output_queues = self.output_queues
372-
fp = self.raw
373-
total_crc = 0
374-
size = 0
375385
while True:
376386
out_index = index % self.threads
377387
output_queue = output_queues[out_index]
378388
try:
379389
compressed, crc, data_length = output_queue.get(timeout=0.05)
380390
except queue.Empty:
381-
if not self.running:
382-
self._crc = total_crc
383-
self._size = size
391+
if not (self.running and self._calling_thread.is_alive()):
384392
return
385393
continue
386-
total_crc = zlib_ng.crc32_combine(total_crc, crc, data_length)
387-
size += data_length
388-
fp.write(compressed)
394+
self._crc = zlib_ng.crc32_combine(self._crc, crc, data_length)
395+
self._size += data_length
396+
self.raw.write(compressed)
389397
output_queue.task_done()
390398
index += 1
391399

392400
def _compress_and_write(self):
393401
if not self.threads == 1:
394402
raise SystemError("Compress_and_write is for one thread only")
395-
fp = self.raw
396-
total_crc = 0
397-
size = 0
398403
in_queue = self.input_queues[0]
399404
compressor = self.compressors[0]
400405
while True:
401406
try:
402407
data, zdict = in_queue.get(timeout=0.05)
403408
except queue.Empty:
404-
if not self.running:
405-
self._crc = total_crc
406-
self._size = size
409+
if not (self.running and self._calling_thread.is_alive()):
407410
return
408411
continue
409412
try:
@@ -413,9 +416,9 @@ def _compress_and_write(self):
413416
self._set_error_and_empty_queue(e, in_queue)
414417
return
415418
data_length = len(data)
416-
total_crc = zlib_ng.crc32_combine(total_crc, crc, data_length)
417-
size += data_length
418-
fp.write(compressed)
419+
self._crc = zlib_ng.crc32_combine(self._crc, crc, data_length)
420+
self._size += data_length
421+
self.raw.write(compressed)
419422
in_queue.task_done()
420423

421424
def _set_error_and_empty_queue(self, error, q):

tests/test_gzip_ng_threaded.py

+37
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import io
1010
import itertools
1111
import os
12+
import subprocess
13+
import sys
1214
import tempfile
1315
from pathlib import Path
1416

@@ -209,3 +211,38 @@ def test_threaded_writer_does_not_close_stream():
209211
assert not test_stream.closed
210212
test_stream.seek(0)
211213
assert gzip.decompress(test_stream.read()) == b"thisisatest"
214+
215+
216+
@pytest.mark.timeout(5)
217+
@pytest.mark.parametrize(
218+
["mode", "threads"], itertools.product(["rb", "wb"], [1, 2]))
219+
def test_threaded_program_can_exit_on_error(tmp_path, mode, threads):
220+
program = tmp_path / "no_context_manager.py"
221+
test_file = tmp_path / "output.gz"
222+
# Write 40 mb input data to saturate read buffer. Because of the repetitive
223+
# nature the resulting gzip file is very small (~40 KiB).
224+
test_file.write_bytes(gzip.compress(b"test" * (10 * 1024 * 1024)))
225+
with open(program, "wt") as f:
226+
f.write("from zlib_ng import gzip_ng_threaded\n")
227+
f.write(
228+
f"f = gzip_ng_threaded.open('{test_file}', "
229+
f"mode='{mode}', threads={threads})\n"
230+
)
231+
f.write("raise Exception('Error')\n")
232+
subprocess.run([sys.executable, str(program)])
233+
234+
235+
@pytest.mark.parametrize("threads", [1, 2])
236+
def test_flush(tmp_path, threads):
237+
test_file = tmp_path / "output.gz"
238+
with gzip_ng_threaded.open(test_file, "wb", threads=threads) as f:
239+
f.write(b"1")
240+
f.flush()
241+
assert gzip.decompress(test_file.read_bytes()) == b"1"
242+
f.write(b"2")
243+
f.flush()
244+
assert gzip.decompress(test_file.read_bytes()) == b"12"
245+
f.write(b"3")
246+
f.flush()
247+
assert gzip.decompress(test_file.read_bytes()) == b"123"
248+
assert gzip.decompress(test_file.read_bytes()) == b"123"

0 commit comments

Comments
 (0)