diff --git a/README.rst b/README.rst index 3e43dc7..eb117ac 100644 --- a/README.rst +++ b/README.rst @@ -184,6 +184,13 @@ To ensure that you get the correct ``zstandard`` version, you can specify the `` Changelog --------- +in-development +~~~~~~~~~~~~~~~~~~~ ++ #158: Fixed a bug where reading from stdin and other pipes would discard the + first bytes from the input. ++ #156: Zstd files compressed with the ``--long=31`` files can now be opened + without throwing errors. + v2.0.0 (2024-03-26) ~~~~~~~~~~~~~~~~~~~ diff --git a/src/xopen/__init__.py b/src/xopen/__init__.py index d777641..9cc7c69 100644 --- a/src/xopen/__init__.py +++ b/src/xopen/__init__.py @@ -10,6 +10,7 @@ import dataclasses import gzip +import stat import sys import io import os @@ -701,8 +702,6 @@ def _file_or_path_to_binary_stream( file_or_path: FileOrPath, binary_mode: str ) -> Tuple[BinaryIO, bool]: assert binary_mode in ("rb", "wb", "ab") - if file_or_path == "-": - return _open_stdin_or_out(binary_mode), False if isinstance(file_or_path, (str, bytes)) or hasattr(file_or_path, "__fspath__"): return open(os.fspath(file_or_path), binary_mode), True # type: ignore if isinstance(file_or_path, io.TextIOWrapper): @@ -722,10 +721,23 @@ def _filepath_from_path_or_filelike(fileorpath: FileOrPath) -> str: except TypeError: pass if hasattr(fileorpath, "name"): - return fileorpath.name + name = fileorpath.name + if isinstance(name, str): + return name + elif isinstance(name, bytes): + return name.decode() return "" +def _file_is_a_socket_or_pipe(filepath): + try: + mode = os.stat(filepath).st_mode + # Treat anything that is not a regular file as special + return not stat.S_ISREG(mode) + except (OSError, TypeError): # Type error for unexpected types in stat. + return False + + @overload def xopen( filename: FileOrPath, @@ -756,7 +768,7 @@ def xopen( ... -def xopen( +def xopen( # noqa: C901 filename: FileOrPath, mode: Literal["r", "w", "a", "rt", "rb", "wt", "wb", "at", "ab"] = "r", compresslevel: Optional[int] = None, @@ -819,6 +831,13 @@ def xopen( binary_mode = mode[0] + "b" filepath = _filepath_from_path_or_filelike(filename) + # Open non-regular files such as pipes and sockets here to force opening + # them once. + if filename == "-": + filename = _open_stdin_or_out(binary_mode) + elif _file_is_a_socket_or_pipe(filename): + filename = open(filename, binary_mode) # type: ignore + if format not in (None, "gz", "xz", "bz2", "zst"): raise ValueError( f"Format not supported: {format}. " diff --git a/tests/test_xopen.py b/tests/test_xopen.py index 6bfb764..86234bc 100644 --- a/tests/test_xopen.py +++ b/tests/test_xopen.py @@ -2,7 +2,9 @@ Tests for the xopen.xopen function """ import bz2 +import subprocess import sys +import tempfile from contextlib import contextmanager import functools import gzip @@ -634,3 +636,75 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads): filelike.seek(0) with xopen(filelike, "rb", format=format, threads=threads) as fh: assert fh.readline() == first_line + + +@pytest.mark.parametrize("threads", (0, 1)) +def test_xopen_stdin(monkeypatch, ext, threads): + if ext == ".zst" and zstandard is None: + return + # Add encoding to suppress encoding warnings + with open(TEST_DIR / f"file.txt{ext}", "rt", encoding="latin-1") as in_file: + monkeypatch.setattr("sys.stdin", in_file) + with xopen("-", "rt", threads=threads) as f: + data = f.read() + assert data == CONTENT + + +def test_xopen_stdout(monkeypatch): + # Add encoding to suppress encoding warnings + with tempfile.TemporaryFile(mode="w+t", encoding="latin-1") as raw: + monkeypatch.setattr("sys.stdout", raw) + with xopen("-", "wt") as f: + f.write("Hello world!") + raw.seek(0) + data = raw.read() + assert data == "Hello world!" + + +@pytest.mark.parametrize("threads", (0, 1)) +def test_xopen_read_from_pipe(ext, threads): + if ext == ".zst" and zstandard is None: + return + in_file = TEST_DIR / f"file.txt{ext}" + process = subprocess.Popen(("cat", str(in_file)), stdout=subprocess.PIPE) + with xopen(process.stdout, "rt", threads=threads) as f: + data = f.read() + process.wait() + process.stdout.close() + assert data == CONTENT + + +@pytest.mark.parametrize("threads", (0, 1)) +def test_xopen_write_to_pipe(threads, ext): + if ext == ".zst" and zstandard is None: + return + format = ext.lstrip(".") + if format == "": + format = None + process = subprocess.Popen(("cat",), stdout=subprocess.PIPE, stdin=subprocess.PIPE) + with xopen(process.stdin, "wt", threads=threads, format=format) as f: + f.write(CONTENT) + process.stdin.close() + with xopen(process.stdout, "rt", threads=threads) as f: + data = f.read() + process.wait() + process.stdout.close() + assert data == CONTENT + + +@pytest.mark.skipif( + not os.path.exists("/dev/stdin"), reason="/dev/stdin does not exist" +) +@pytest.mark.parametrize("threads", (0, 1)) +def test_xopen_dev_stdin_read(threads, ext): + if ext == ".zst" and zstandard is None: + return + file = str(Path(__file__).parent / f"file.txt{ext}") + result = subprocess.run( + f"cat {file} | python -c 'import xopen; " + f'f=xopen.xopen("/dev/stdin", "rt", threads={threads});print(f.read())\'', + shell=True, + stdout=subprocess.PIPE, + encoding="ascii", + ) + assert result.stdout == CONTENT + "\n"