Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix stdin bug #158

Merged
merged 8 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,13 @@ To ensure that you get the correct ``zstandard`` version, you can specify the ``
Changelog
---------

in-development
~~~~~~~~~~~~~~~~~~~
+ #158: Fixed a bug where reading from stdin and other pipes would discard the
first bytes from the input.
+ #156: Zstd files compressed with the ``--long=31`` files can now be opened
without throwing errors.

v2.0.0 (2024-03-26)
~~~~~~~~~~~~~~~~~~~

Expand Down
27 changes: 23 additions & 4 deletions src/xopen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import dataclasses
import gzip
import stat
import sys
import io
import os
Expand Down Expand Up @@ -701,8 +702,6 @@ def _file_or_path_to_binary_stream(
file_or_path: FileOrPath, binary_mode: str
) -> Tuple[BinaryIO, bool]:
assert binary_mode in ("rb", "wb", "ab")
if file_or_path == "-":
return _open_stdin_or_out(binary_mode), False
if isinstance(file_or_path, (str, bytes)) or hasattr(file_or_path, "__fspath__"):
return open(os.fspath(file_or_path), binary_mode), True # type: ignore
if isinstance(file_or_path, io.TextIOWrapper):
Expand All @@ -722,10 +721,23 @@ def _filepath_from_path_or_filelike(fileorpath: FileOrPath) -> str:
except TypeError:
pass
if hasattr(fileorpath, "name"):
return fileorpath.name
name = fileorpath.name
if isinstance(name, str):
return name
elif isinstance(name, bytes):
return name.decode()
return ""


def _file_is_a_socket_or_pipe(filepath):
try:
mode = os.stat(filepath).st_mode
# Treat anything that is not a regular file as special
return not stat.S_ISREG(mode)
except (OSError, TypeError): # Type error for unexpected types in stat.
return False


@overload
def xopen(
filename: FileOrPath,
Expand Down Expand Up @@ -756,7 +768,7 @@ def xopen(
...


def xopen(
def xopen( # noqa: C901
filename: FileOrPath,
mode: Literal["r", "w", "a", "rt", "rb", "wt", "wb", "at", "ab"] = "r",
compresslevel: Optional[int] = None,
Expand Down Expand Up @@ -819,6 +831,13 @@ def xopen(
binary_mode = mode[0] + "b"
filepath = _filepath_from_path_or_filelike(filename)

# Open non-regular files such as pipes and sockets here to force opening
# them once.
if filename == "-":
filename = _open_stdin_or_out(binary_mode)
elif _file_is_a_socket_or_pipe(filename):
filename = open(filename, binary_mode) # type: ignore

if format not in (None, "gz", "xz", "bz2", "zst"):
raise ValueError(
f"Format not supported: {format}. "
Expand Down
74 changes: 74 additions & 0 deletions tests/test_xopen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
Tests for the xopen.xopen function
"""
import bz2
import subprocess
import sys
import tempfile
from contextlib import contextmanager
import functools
import gzip
Expand Down Expand Up @@ -634,3 +636,75 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads):
filelike.seek(0)
with xopen(filelike, "rb", format=format, threads=threads) as fh:
assert fh.readline() == first_line


@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_stdin(monkeypatch, ext, threads):
if ext == ".zst" and zstandard is None:
return
# Add encoding to suppress encoding warnings
with open(TEST_DIR / f"file.txt{ext}", "rt", encoding="latin-1") as in_file:
monkeypatch.setattr("sys.stdin", in_file)
with xopen("-", "rt", threads=threads) as f:
data = f.read()
assert data == CONTENT


def test_xopen_stdout(monkeypatch):
# Add encoding to suppress encoding warnings
with tempfile.TemporaryFile(mode="w+t", encoding="latin-1") as raw:
monkeypatch.setattr("sys.stdout", raw)
with xopen("-", "wt") as f:
f.write("Hello world!")
raw.seek(0)
data = raw.read()
assert data == "Hello world!"


@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_read_from_pipe(ext, threads):
if ext == ".zst" and zstandard is None:
return
in_file = TEST_DIR / f"file.txt{ext}"
process = subprocess.Popen(("cat", str(in_file)), stdout=subprocess.PIPE)
with xopen(process.stdout, "rt", threads=threads) as f:
data = f.read()
process.wait()
process.stdout.close()
assert data == CONTENT


@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_write_to_pipe(threads, ext):
if ext == ".zst" and zstandard is None:
return
format = ext.lstrip(".")
if format == "":
format = None
process = subprocess.Popen(("cat",), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
with xopen(process.stdin, "wt", threads=threads, format=format) as f:
f.write(CONTENT)
process.stdin.close()
with xopen(process.stdout, "rt", threads=threads) as f:
data = f.read()
process.wait()
process.stdout.close()
assert data == CONTENT


@pytest.mark.skipif(
not os.path.exists("/dev/stdin"), reason="/dev/stdin does not exist"
)
@pytest.mark.parametrize("threads", (0, 1))
def test_xopen_dev_stdin_read(threads, ext):
if ext == ".zst" and zstandard is None:
return
file = str(Path(__file__).parent / f"file.txt{ext}")
result = subprocess.run(
f"cat {file} | python -c 'import xopen; "
f'f=xopen.xopen("/dev/stdin", "rt", threads={threads});print(f.read())\'',
shell=True,
stdout=subprocess.PIPE,
encoding="ascii",
)
assert result.stdout == CONTENT + "\n"
Loading