Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to read from IOBase objects #37

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions libarchive/adapters/archive_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,46 @@ def opener(archive_res):
*args,
**kwargs)

def stream_enumerator(io, buffer_size=1048576, *args, **kwargs):
buf = ctypes.create_string_buffer(buffer_size)

def archive_read(archive, client_data, out):
try:
bytes_read = io.readinto(buf)
out.contents.contents = buf
return bytes_read
except (IOError, OSError) as e:
_LOGGER.debug("Exception during stream read: %s", e)
return libarchive.constants.archive.ARCHIVE_FATAL

def archive_seek(archive, client_data, offset, whence):
try:
return io.seek(offset, whence)
except (IOError, OSError) as e:
_LOGGER.debug("Exception during stream seek: %s", e)
return libarchive.constants.archive.ARCHIVE_FATAL

def archive_close(archive, client_data):
try:
io.close()
return libarchive.constants.archive.ARCHIVE_OK
except (IOError, OSError) as e:
_LOGGER.debug("Exception during stream close: %s", e)
return libarchive.constants.archive.ARCHIVE_FATAL

read_func = libarchive.calls.archive_read.c_archive_read_func(archive_read)
seek_func = libarchive.calls.archive_read.c_archive_seek_func(archive_seek)
close_func = libarchive.calls.archive_read.c_archive_close_func(archive_close)

def opener(archive_res):
libarchive.calls.archive_read.c_archive_read_set_seek_callback(archive_res, seek_func)
libarchive.calls.archive_read.c_archive_read_open(archive_res, None, None, read_func, close_func)

if 'entry_cls' not in kwargs:
kwargs['entry_cls'] = _ArchiveEntryItReadable

return _enumerator(opener, *args, **kwargs)

def file_reader(*args, **kwargs):
"""Return an enumerator that knows how to read the data for entries from a
physical file.
Expand All @@ -345,6 +385,12 @@ def memory_reader(*args, **kwargs):
entry_cls=_ArchiveEntryItReadable,
**kwargs)

def stream_reader(io, *args, **kwargs):
"""Return an enumerator that can read from a Python IOBase stream.
"""

return stream_enumerator(io, *args, **kwargs)

def _pour(opener, flags=0, *args, **kwargs):
"""A flexible pouring facility that knows how to enumerate entry data."""

Expand Down
20 changes: 20 additions & 0 deletions libarchive/calls/archive_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ def _check_zero_success(value):
c_archive_read_support_format_all.argtypes = [c_void_p]
c_archive_read_support_format_all.restype = _check_zero_success

c_archive_read_func = CFUNCTYPE(c_ssize_t, c_void_p, c_void_p, POINTER(POINTER(c_char)))
c_archive_seek_func = CFUNCTYPE(c_longlong, c_void_p, c_void_p, c_longlong, c_int)
c_archive_close_func = CFUNCTYPE(c_int, c_void_p, c_void_p)

c_archive_read_open = libarchive.archive_read_open
c_archive_read_open.argtypes = [c_void_p, c_void_p, c_void_p, c_archive_read_func, c_archive_close_func]
c_archive_read_open.restype = c_int

c_archive_read_open_filename = libarchive.archive_read_open_filename
c_archive_read_open_filename.argtypes = [c_void_p, c_char_p, c_size_t]
c_archive_read_open_filename.restype = _check_zero_success
Expand Down Expand Up @@ -84,3 +92,15 @@ def _check_zero_success(value):
c_archive_read_data_block = libarchive.archive_read_data_block
c_archive_read_data_block.argtypes = [c_void_p, POINTER(c_void_p), POINTER(c_size_t), POINTER(c_longlong)]
c_archive_read_data_block.restype = c_int

c_archive_read_set_read_callback = libarchive.archive_read_set_read_callback
c_archive_read_set_read_callback.argtypes = [c_void_p, c_archive_read_func]
c_archive_read_set_read_callback.restype = c_int

c_archive_read_set_seek_callback = libarchive.archive_read_set_seek_callback
c_archive_read_set_seek_callback.argtypes = [c_void_p, c_archive_seek_func]
c_archive_read_set_seek_callback.restype = c_int

c_archive_read_set_close_callback = libarchive.archive_read_set_close_callback
c_archive_read_set_close_callback.argtypes = [c_void_p, c_archive_close_func]
c_archive_read_set_close_callback.restype = c_int
8 changes: 5 additions & 3 deletions libarchive/public.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from libarchive.adapters.archive_read import \
file_enumerator, file_reader, file_pour, \
memory_enumerator, memory_reader, memory_pour
from libarchive.adapters.archive_read import (
file_enumerator, file_reader, file_pour,
memory_enumerator, memory_reader, memory_pour,
stream_enumerator
)

from libarchive.adapters.archive_write import \
create_file, create_generic
17 changes: 17 additions & 0 deletions libarchive/resources/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Done Task
===== =================================================
X Read entries from physical file
X Read entries from archive hosted in memory buffer
X Read entries from a Python IOBase stream
X Write physical files from archive
X Load memory buffer from archive
X Populate physical archive from physical files
Expand Down Expand Up @@ -87,6 +88,22 @@ To read files from a physical archive::
for block in entry.get_blocks():
f.write(block)

To read files from an IOBase stream::

import libarchive.public

with open('test.7z', 'rb') as io:
with libarchive.public.stream_reader(io) as e:
for entry in e:
with open('/tmp/' + str(entry), 'wb') as f:
for block in entry.get_blocks():
f.write(block)

If the archive format is such that it requires seeking during load, then the stream must be seekable.

Also note that libarchive's automatic detection only really works with archive formats in the absence of filename information. So, if streaming decompression of a non-archive format such as tar then it may be necessary to specify the format explicitly, as detailed below.


To read files from memory::

import libarchive.public
Expand Down
15 changes: 13 additions & 2 deletions libarchive/test_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@ def chdir(path):
os.chdir(original_path)

@contextlib.contextmanager
def test_archive():
def test_files():
"""
Returns a tuple have all input test files and the output archive path.

:return: Tuple (input_file_paths, output_archive_path)
"""

with chdir(_APP_PATH):
temp_path = tempfile.mkdtemp()

Expand Down Expand Up @@ -70,9 +76,14 @@ def test_archive():
os.path.exists(output_filepath) is True, \
"Test archive was not created correctly."

yield output_filepath
yield (files, output_filepath)
finally:
try:
shutil.rmtree(temp_path)
except:
pass

@contextlib.contextmanager
def test_archive():
with test_files() as (_, archive_path):
yield archive_path
21 changes: 21 additions & 0 deletions tests/adapters/test_archive_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,24 @@ def test_read_symlinks(self):
}

self.assertEquals(index, expected)

def test_read_from_stream(self):
with libarchive.test_support.test_files() as (infiles, archivepath):
with open(archivepath, 'rb') as io_in:
with libarchive.adapters.archive_read.stream_enumerator(io_in) as e:
entries = {entry.pathname: (entry.filetype, list(entry.get_blocks())) for entry in e}

for path in infiles:
# At some point during compression, the root separator is stripped from absolute paths
self.assertIn(path.lstrip('/'), entries)

filetype, blocks = entries[path.lstrip('/')]
filedata = bytes()
for block in blocks:
filedata += block

if filetype.IFLNK:
self.assertEqual(filedata, bytes())
else:
with open(path, 'rb') as io_in:
self.assertEqual(filedata, io_in.read())