From 3967defafc3046c526f8d94366fd4c7cec2d4306 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 27 Mar 2024 14:48:47 +0100 Subject: [PATCH 1/8] Test stdin and stdout --- tests/test_xopen.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 6bfb764..9a2e046 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -3,6 +3,7 @@ """ import bz2 import sys +import tempfile from contextlib import contextmanager import functools import gzip @@ -634,3 +635,21 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): filelike.seek(0) with xopen(filelike, "rb", format=format, threads=threads) as fh: assert fh.readline() == first_line + + +def test_xopen_stdin(monkeypatch): + with open(TEST_DIR / "file.txt") as in_file: + monkeypatch.setattr("sys.stdin", in_file) + with xopen("-", "rt") as f: + data = f.read() + assert data == CONTENT + + +def test_xopen_stdout(monkeypatch): + with tempfile.TemporaryFile(mode="w+t") as raw: + monkeypatch.setattr("sys.stdout", raw) + with xopen("-", "wt") as f: + f.write("Hello world!") + raw.seek(0) + data = raw.read() + assert data == "Hello world!" From a2430ded62b054318b73026c2301e3fc0aec39df Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 27 Mar 2024 15:58:34 +0100 Subject: [PATCH 2/8] Fix stdin and pipe reading --- src/xopen/__init__.py | 20 +++++++++++++++++--- tests/test_xopen.py | 9 ++++++--- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index d777641..7da6746 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -10,6 +10,7 @@ import dataclasses import gzip +import stat import sys import io import os @@ -701,8 +702,6 @@ def _file_or_path_to_binary_stream( file_or_path: FileOrPath, binary_mode: str ) -> Tuple[BinaryIO, bool]: assert binary_mode in ("rb", "wb", "ab") - if file_or_path == "-": - return _open_stdin_or_out(binary_mode), False if isinstance(file_or_path, (str, bytes)) or hasattr(file_or_path, "__fspath__"): return open(os.fspath(file_or_path), binary_mode), True # type: ignore if isinstance(file_or_path, io.TextIOWrapper): @@ -726,6 +725,14 @@ def _filepath_from_path_or_filelike(fileorpath: FileOrPath) -> str: return "" +def _file_is_a_socket_or_pipe(filepath): + try: + mode = os.stat(filepath).st_mode + return not (stat.S_ISFIFO(mode) or stat.S_ISSOCK(mode) or stat.S_ISPORT(mode)) + except (OSError, TypeError): # Type error for unexpected types in stat. + return False + + @overload def xopen( filename: FileOrPath, @@ -756,7 +763,7 @@ def xopen( ... -def xopen( +def xopen( # noqa: C901 filename: FileOrPath, mode: Literal["r", "w", "a", "rt", "rb", "wt", "wb", "at", "ab"] = "r", compresslevel: Optional[int] = None, @@ -819,6 +826,13 @@ def xopen( binary_mode = mode[0] + "b" filepath = _filepath_from_path_or_filelike(filename) + # Open non-regular files such as pipes and sockets here to force opening + # them once. + if filename == "-": + filename = _open_stdin_or_out(binary_mode) + elif _file_is_a_socket_or_pipe(filename): + filename = open(filename, binary_mode) + if format not in (None, "gz", "xz", "bz2", "zst"): raise ValueError( f"Format not supported: {format}. " diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 9a2e046..7412863 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -637,10 +637,13 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): assert fh.readline() == first_line -def test_xopen_stdin(monkeypatch): - with open(TEST_DIR / "file.txt") as in_file: +@pytest.mark.parametrize("threads", (0, 1)) +def test_xopen_stdin(monkeypatch, ext, threads): + if ext == ".zst" and zstandard is None: + return + with open(TEST_DIR / f"file.txt{ext}") as in_file: monkeypatch.setattr("sys.stdin", in_file) - with xopen("-", "rt") as f: + with xopen("-", "rt", threads=threads) as f: data = f.read() assert data == CONTENT From a6f4b9eddea688975065dd5ed19a275ab49f1de2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 27 Mar 2024 16:31:19 +0100 Subject: [PATCH 3/8] Add extensive tests to read from pipes --- src/xopen/__init__.py | 8 ++++++-- tests/test_xopen.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index 7da6746..bd5b967 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -721,7 +721,11 @@ def _filepath_from_path_or_filelike(fileorpath: FileOrPath) -> str: except TypeError: pass if hasattr(fileorpath, "name"): - return fileorpath.name + name = fileorpath.name + if isinstance(name, str): + return name + elif isinstance(name, bytes): + return name.decode() return "" @@ -831,7 +835,7 @@ def xopen( # noqa: C901 if filename == "-": filename = _open_stdin_or_out(binary_mode) elif _file_is_a_socket_or_pipe(filename): - filename = open(filename, binary_mode) + filename = open(filename, binary_mode) # type: ignore if format not in (None, "gz", "xz", "bz2", "zst"): raise ValueError( diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 7412863..d51d3cc 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -2,6 +2,7 @@ Tests for the xopen.xopen function """ import bz2 +import subprocess import sys import tempfile from contextlib import contextmanager @@ -656,3 +657,32 @@ def test_xopen_stdout(monkeypatch): raw.seek(0) data = raw.read() assert data == "Hello world!" + + +@pytest.mark.parametrize("threads", (0, 1)) +def test_xopen_read_from_pipe(ext, threads): + if ext == ".zst" and zstandard is None: + return + in_file = TEST_DIR / f"file.txt{ext}" + process = subprocess.Popen(("cat", str(in_file)), stdout=subprocess.PIPE) + with xopen(process.stdout, "rt", threads=threads) as f: + data = f.read() + process.wait() + assert data == CONTENT + + +@pytest.mark.parametrize("threads", (0, 1)) +def test_xopen_write_to_pipe(threads, ext): + if ext == ".zst" and zstandard is None: + return + format = ext.lstrip(".") + if format == "": + format = None + process = subprocess.Popen(("cat",), stdout=subprocess.PIPE, stdin=subprocess.PIPE) + with xopen(process.stdin, "wt", threads=threads, format=format) as f: + f.write(CONTENT) + process.stdin.close() + with xopen(process.stdout, "rt", threads=threads) as f: + data = f.read() + process.wait() + assert data == CONTENT From 1ad041dd08a2ae9539fbcb7d45d965c772b9f621 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 27 Mar 2024 16:54:22 +0100 Subject: [PATCH 4/8] Fix wrong condition check --- src/xopen/__init__.py | 2 +- tests/test_xopen.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index bd5b967..873ae40 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -732,7 +732,7 @@ def _filepath_from_path_or_filelike(fileorpath: FileOrPath) -> str: def _file_is_a_socket_or_pipe(filepath): try: mode = os.stat(filepath).st_mode - return not (stat.S_ISFIFO(mode) or stat.S_ISSOCK(mode) or stat.S_ISPORT(mode)) + return stat.S_ISFIFO(mode) or stat.S_ISSOCK(mode) or stat.S_ISPORT(mode) except (OSError, TypeError): # Type error for unexpected types in stat. return False diff --git a/tests/test_xopen.py b/tests/test_xopen.py index d51d3cc..c6e0409 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -642,7 +642,7 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): def test_xopen_stdin(monkeypatch, ext, threads): if ext == ".zst" and zstandard is None: return - with open(TEST_DIR / f"file.txt{ext}") as in_file: + with open(TEST_DIR / f"file.txt{ext}", "rt") as in_file: monkeypatch.setattr("sys.stdin", in_file) with xopen("-", "rt", threads=threads) as f: data = f.read() From aa854982a4d1be33d89cc7e29463cc518686f4a0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 27 Mar 2024 17:00:00 +0100 Subject: [PATCH 5/8] Fix resource and encodingwarnings caused by test code --- tests/test_xopen.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index c6e0409..47974a1 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -642,7 +642,8 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): def test_xopen_stdin(monkeypatch, ext, threads): if ext == ".zst" and zstandard is None: return - with open(TEST_DIR / f"file.txt{ext}", "rt") as in_file: + # Add encoding to suppress encoding warnings + with open(TEST_DIR / f"file.txt{ext}", "rt", encoding="latin-1") as in_file: monkeypatch.setattr("sys.stdin", in_file) with xopen("-", "rt", threads=threads) as f: data = f.read() @@ -650,7 +651,8 @@ def test_xopen_stdin(monkeypatch, ext, threads): def test_xopen_stdout(monkeypatch): - with tempfile.TemporaryFile(mode="w+t") as raw: + # Add encoding to suppress encoding warnings + with tempfile.TemporaryFile(mode="w+t", encoding="latin-1") as raw: monkeypatch.setattr("sys.stdout", raw) with xopen("-", "wt") as f: f.write("Hello world!") @@ -668,6 +670,7 @@ def test_xopen_read_from_pipe(ext, threads): with xopen(process.stdout, "rt", threads=threads) as f: data = f.read() process.wait() + process.stdout.close() assert data == CONTENT @@ -685,4 +688,5 @@ def test_xopen_write_to_pipe(threads, ext): with xopen(process.stdout, "rt", threads=threads) as f: data = f.read() process.wait() + process.stdout.close() assert data == CONTENT From 7060bf34b9f10c7bc989024073a8ab6bdefc8772 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 27 Mar 2024 17:06:11 +0100 Subject: [PATCH 6/8] Add a changelog entry for the stdin bugfix --- README.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.rst b/README.rst index 3e43dc7..eb117ac 100644 --- a/README.rst +++ b/README.rst @@ -184,6 +184,13 @@ To ensure that you get the correct ``zstandard`` version, you can specify the `` Changelog --------- +in-development +~~~~~~~~~~~~~~~~~~~ ++ #158: Fixed a bug where reading from stdin and other pipes would discard the + first bytes from the input. ++ #156: Zstd files compressed with the ``--long=31`` files can now be opened + without throwing errors. + v2.0.0 (2024-03-26) ~~~~~~~~~~~~~~~~~~~ From 72f4d4dbfa85fb7b6611b0d59ab7be60a2fa3617 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 Mar 2024 06:51:41 +0100 Subject: [PATCH 7/8] Add a test for /dev/stdin --- tests/test_xopen.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 47974a1..86234bc 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -690,3 +690,21 @@ def test_xopen_write_to_pipe(threads, ext): process.wait() process.stdout.close() assert data == CONTENT + + +@pytest.mark.skipif( + not os.path.exists("/dev/stdin"), reason="/dev/stdin does not exist" +) +@pytest.mark.parametrize("threads", (0, 1)) +def test_xopen_dev_stdin_read(threads, ext): + if ext == ".zst" and zstandard is None: + return + file = str(Path(__file__).parent / f"file.txt{ext}") + result = subprocess.run( + f"cat {file} | python -c 'import xopen; " + f'f=xopen.xopen("/dev/stdin", "rt", threads={threads});print(f.read())\'', + shell=True, + stdout=subprocess.PIPE, + encoding="ascii", + ) + assert result.stdout == CONTENT + "\n" From 77778a396afad1355b2c6e8c1af3588303cd51e9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 Mar 2024 07:47:04 +0100 Subject: [PATCH 8/8] Use the stat.S_ISREG check rather than try to cover all corner cases --- src/xopen/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index 873ae40..9cc7c69 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -732,7 +732,8 @@ def _filepath_from_path_or_filelike(fileorpath: FileOrPath) -> str: def _file_is_a_socket_or_pipe(filepath): try: mode = os.stat(filepath).st_mode - return stat.S_ISFIFO(mode) or stat.S_ISSOCK(mode) or stat.S_ISPORT(mode) + # Treat anything that is not a regular file as special + return not stat.S_ISREG(mode) except (OSError, TypeError): # Type error for unexpected types in stat. return False