Skip to content

Commit

Permalink
Merge pull request #15 from PoroCYon/master
Browse files Browse the repository at this point in the history
Enhance CLI interface
  • Loading branch information
anjakefala committed Sep 12, 2023
2 parents fced018 + 7d11d3d commit a0d20b5
Showing 1 changed file with 84 additions and 23 deletions.
107 changes: 84 additions & 23 deletions unzip-http
Original file line number Diff line number Diff line change
@@ -1,20 +1,35 @@
#!/usr/bin/env python3

'''
Usage:
unzip-http <url.zip> <filenames..>
"""
usage: unzip-http [-h] [-l] [-f] [-o] url [files ...]
Extract <filenames> from a remote .zip at <url> to stdout.
If no filenames given, displays .zip contents (filenames and sizes).
Each filename can be a wildcard glob; all matching files are concatenated and sent to stdout in zipfile order.
Extract individual files from .zip files over http without downloading the
entire archive. HTTP server must send `Accept-Ranges: bytes` and
`Content-Length` in headers.
HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.
'''
positional arguments:
url URL of the remote zip file
files Files to extract. If no filenames given, displays .zip
contents (filenames and sizes). Each filename can be a
wildcard glob.
options:
-h, --help show this help message and exit
-l, --list List files in the remote zip file
-f, --full-filepaths Recreate folder structure from zip file when extracting
(instead of extracting the files to the current
directory)
-o, --stdout Write files to stdout (if multiple files: concatenate
them to stdout, in zipfile order)
"""

import sys
import io
import math
import time
import fnmatch
import argparse
import pathlib

import unzip_http

Expand Down Expand Up @@ -44,19 +59,65 @@ class StreamProgress:
return r


def main(url, *globs):
rzf = unzip_http.RemoteZipFile(url)
def list_files(rzf):
def safelog(x):
return 1 if x == 0 else math.ceil(math.log10(x))

digits_compr = max(safelog(f.compress_size) for f in rzf.infolist())
digits_plain = max(safelog(f.file_size ) for f in rzf.infolist())
fmtstr = f'%{digits_compr}d -> %{digits_plain}d\t%s'
for f in rzf.infolist():
if not globs:
print(f'{f.compress_size} -> {f.file_size} {f.filename}')
elif any(fnmatch.fnmatch(f.filename, g) for g in globs):
fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size)
while r := fp.read(2**18):
sys.stdout.buffer.write(r)


args = sys.argv[1:]
if not args:
print(__doc__, file=sys.stderr)
else:
main(*args)
print(fmtstr % (f.compress_size, f.file_size, f.filename), file=sys.stderr)


def extract_one(outfile, rzf, f, ofname):
print(f'Extracting {f.filename} to {ofname}...', file=sys.stderr)

fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size)
while r := fp.read(2**18):
outfile.write(r)


def download_file(f, rzf, args):
if not any(fnmatch.fnmatch(f.filename, g) for g in args.files):
return

if args.stdout:
extract_one(sys.stdout.buffer, rzf, f, "stdout")
else:
path = pathlib.Path(f.filename)
if args.full_filepaths:
path.parent.mkdir(parents=True, exist_ok=True)
else:
path = path.name

with open(str(path), 'wb') as of:
extract_one(of, rzf, f, str(path))


def main(args):
rzf = unzip_http.RemoteZipFile(args.url[0])
if args.list or len(args.files) == 0:
list_files(rzf)
else:
for f in rzf.infolist():
download_file(f, rzf, args)


if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='unzip-http', \
description="Extract individual files from .zip files over http without downloading the entire archive. HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.")

parser.add_argument('-l', '--list', action='store_true', default=False,
help="List files in the remote zip file")
parser.add_argument('-f', '--full-filepaths', action='store_true', default=False,
help="Recreate folder structure from zip file when extracting (instead of extracting the files to the current directory)")
parser.add_argument('-o', '--stdout', action='store_true', default=False,
help="Write files to stdout (if multiple files: concatenate them to stdout, in zipfile order)")

parser.add_argument("url", nargs=1, help="URL of the remote zip file")
parser.add_argument("files", nargs='*', help="Files to extract. If no filenames given, displays .zip contents (filenames and sizes). Each filename can be a wildcard glob.")

args = parser.parse_args()
main(args)

0 comments on commit a0d20b5

Please sign in to comment.