Skip to content

Commit

Permalink
Merge pull request #158 from pycompression/fixbug
Browse files Browse the repository at this point in the history
Fix stdin bug
  • Loading branch information
rhpvorderman authored Mar 28, 2024
2 parents 63a10ff + 77778a3 commit 3306269
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 4 deletions.
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,13 @@ To ensure that you get the correct ``zstandard`` version, you can specify the ``
Changelog
---------

in-development
~~~~~~~~~~~~~~~~~~~
+ #158: Fixed a bug where reading from stdin and other pipes would discard the
first bytes from the input.
+ #156: Zstd files compressed with the ``--long=31`` files can now be opened
without throwing errors.

v2.0.0 (2024-03-26)
~~~~~~~~~~~~~~~~~~~

Expand Down
27 changes: 23 additions & 4 deletions src/xopen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import dataclasses
import gzip
import stat
import sys
import io
import os
Expand Down Expand Up @@ -701,8 +702,6 @@ def _file_or_path_to_binary_stream(
file_or_path: FileOrPath, binary_mode: str
) -> Tuple[BinaryIO, bool]:
assert binary_mode in ("rb", "wb", "ab")
if file_or_path == "-":
return _open_stdin_or_out(binary_mode), False
if isinstance(file_or_path, (str, bytes)) or hasattr(file_or_path, "__fspath__"):
return open(os.fspath(file_or_path), binary_mode), True # type: ignore
if isinstance(file_or_path, io.TextIOWrapper):
Expand All @@ -722,10 +721,23 @@ def _filepath_from_path_or_filelike(fileorpath: FileOrPath) -> str:
except TypeError:
pass
if hasattr(fileorpath, "name"):
return fileorpath.name
name = fileorpath.name
if isinstance(name, str):
return name
elif isinstance(name, bytes):
return name.decode()
return ""


def _file_is_a_socket_or_pipe(filepath):
try:
mode = os.stat(filepath).st_mode
# Treat anything that is not a regular file as special
return not stat.S_ISREG(mode)
except (OSError, TypeError): # Type error for unexpected types in stat.
return False


@overload
def xopen(
filename: FileOrPath,
Expand Down Expand Up @@ -756,7 +768,7 @@ def xopen(
...


def xopen(
def xopen( # noqa: C901
filename: FileOrPath,
mode: Literal["r", "w", "a", "rt", "rb", "wt", "wb", "at", "ab"] = "r",
compresslevel: Optional[int] = None,
Expand Down Expand Up @@ -819,6 +831,13 @@ def xopen(
binary_mode = mode[0] + "b"
filepath = _filepath_from_path_or_filelike(filename)

# Open non-regular files such as pipes and sockets here to force opening
# them once.
if filename == "-":
filename = _open_stdin_or_out(binary_mode)
elif _file_is_a_socket_or_pipe(filename):
filename = open(filename, binary_mode) # type: ignore

if format not in (None, "gz", "xz", "bz2", "zst"):
raise ValueError(
f"Format not supported: {format}. "
Expand Down
74 changes: 74 additions & 0 deletions tests/test_xopen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
Tests for the xopen.xopen function
"""
import bz2
import subprocess
import sys
import tempfile
from contextlib import contextmanager
import functools
import gzip
Expand Down Expand Up @@ -634,3 +636,75 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads):
filelike.seek(0)
with xopen(filelike, "rb", format=format, threads=threads) as fh:
assert fh.readline() == first_line


@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_stdin(monkeypatch, ext, threads):
if ext == ".zst" and zstandard is None:
return
# Add encoding to suppress encoding warnings
with open(TEST_DIR / f"file.txt{ext}", "rt", encoding="latin-1") as in_file:
monkeypatch.setattr("sys.stdin", in_file)
with xopen("-", "rt", threads=threads) as f:
data = f.read()
assert data == CONTENT


def test_xopen_stdout(monkeypatch):
# Add encoding to suppress encoding warnings
with tempfile.TemporaryFile(mode="w+t", encoding="latin-1") as raw:
monkeypatch.setattr("sys.stdout", raw)
with xopen("-", "wt") as f:
f.write("Hello world!")
raw.seek(0)
data = raw.read()
assert data == "Hello world!"


@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_read_from_pipe(ext, threads):
if ext == ".zst" and zstandard is None:
return
in_file = TEST_DIR / f"file.txt{ext}"
process = subprocess.Popen(("cat", str(in_file)), stdout=subprocess.PIPE)
with xopen(process.stdout, "rt", threads=threads) as f:
data = f.read()
process.wait()
process.stdout.close()
assert data == CONTENT


@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_write_to_pipe(threads, ext):
if ext == ".zst" and zstandard is None:
return
format = ext.lstrip(".")
if format == "":
format = None
process = subprocess.Popen(("cat",), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
with xopen(process.stdin, "wt", threads=threads, format=format) as f:
f.write(CONTENT)
process.stdin.close()
with xopen(process.stdout, "rt", threads=threads) as f:
data = f.read()
process.wait()
process.stdout.close()
assert data == CONTENT


@pytest.mark.skipif(
not os.path.exists("/dev/stdin"), reason="/dev/stdin does not exist"
)
@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_dev_stdin_read(threads, ext):
if ext == ".zst" and zstandard is None:
return
file = str(Path(__file__).parent / f"file.txt{ext}")
result = subprocess.run(
f"cat {file} | python -c 'import xopen; "
f'f=xopen.xopen("/dev/stdin", "rt", threads={threads});print(f.read())\'',
shell=True,
stdout=subprocess.PIPE,
encoding="ascii",
)
assert result.stdout == CONTENT + "\n"

0 comments on commit 3306269

Please sign in to comment.