diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 204321d..8a3b783 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -28,4 +28,4 @@ jobs: run: | python setup.py develop - name: Run basic test - run: unzip-http http://psa.download.navigation.com/automotive/PSA/RT6-SMEGx/M49RG20-Q0420-2001.ZIP DATA/CURR_VERS_NAVI.TXT + run: unzip_http.py http://psa.download.navigation.com/automotive/PSA/RT6-SMEGx/M49RG20-Q0420-2001.ZIP DATA/CURR_VERS_NAVI.TXT diff --git a/README.md b/README.md index 9ea716d..a94f316 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Extract individual files from .zip files over http without downloading the entir ## Usage - unzip-http [-l] [-f] [-o] + unzip_http [-l] [-f] [-o] Extract from a remote .zip at `` to stdout. diff --git a/setup.py b/setup.py index 5196804..f0456e9 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ def requirements(): setup( name="unzip-http", - version="0.5.1", + version="0.6", description="extract files from .zip files over http without downloading entire archive", long_description=readme(), long_description_content_type="text/markdown", @@ -27,6 +27,9 @@ def requirements(): url="https://github.com/saulpw/unzip-http", python_requires=">=3.8", py_modules=["unzip_http"], - scripts=["unzip-http"], + scripts=["unzip_http.py"], + entry_points={ + "console_scripts": ["unzip_http=unzip_http:main"], + }, install_requires=requirements(), ) diff --git a/unzip-http b/unzip-http deleted file mode 100755 index e903307..0000000 --- a/unzip-http +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 - -""" -usage: unzip-http [-h] [-l] [-f] [-o] url [files ...] - -Extract individual files from .zip files over http without downloading the -entire archive. HTTP server must send `Accept-Ranges: bytes` and -`Content-Length` in headers. - -positional arguments: - url URL of the remote zip file - files Files to extract. If no filenames given, displays .zip - contents (filenames and sizes). Each filename can be a - wildcard glob. - -options: - -h, --help show this help message and exit - -l, --list List files in the remote zip file - -f, --full-filepaths Recreate folder structure from zip file when extracting - (instead of extracting the files to the current - directory) - -o, --stdout Write files to stdout (if multiple files: concatenate - them to stdout, in zipfile order) -""" - -import sys -import io -import math -import time -import fnmatch -import argparse -import pathlib - -import unzip_http - - -class StreamProgress: - def __init__(self, fp, name='', total=0): - self.name = name - self.fp = fp - self.total = total - self.start_time = time.time() - self.last_update = 0 - self.amtread = 0 - - def read(self, n): - r = self.fp.read(n) - self.amtread += len(r) - now = time.time() - if now - self.last_update > 0.1: - self.last_update = now - - elapsed_s = now - self.start_time - sys.stderr.write(f'\r{elapsed_s:.0f}s {self.amtread/10**6:.02f}/{self.total/10**6:.02f}MB ({self.amtread/10**6/elapsed_s:.02f} MB/s) {self.name}') - - if not r: - sys.stderr.write('\n') - - return r - - -def list_files(rzf): - def safelog(x): - return 1 if x == 0 else math.ceil(math.log10(x)) - - digits_compr = max(safelog(f.compress_size) for f in rzf.infolist()) - digits_plain = max(safelog(f.file_size ) for f in rzf.infolist()) - fmtstr = f'%{digits_compr}d -> %{digits_plain}d\t%s' - for f in rzf.infolist(): - print(fmtstr % (f.compress_size, f.file_size, f.filename), file=sys.stderr) - - -def extract_one(outfile, rzf, f, ofname): - print(f'Extracting {f.filename} to {ofname}...', file=sys.stderr) - - fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size) - while r := fp.read(2**18): - outfile.write(r) - - -def download_file(f, rzf, args): - if not any(fnmatch.fnmatch(f.filename, g) for g in args.files): - return - - if args.stdout: - extract_one(sys.stdout.buffer, rzf, f, "stdout") - else: - path = pathlib.Path(f.filename) - if args.full_filepaths: - path.parent.mkdir(parents=True, exist_ok=True) - else: - path = path.name - - with open(str(path), 'wb') as of: - extract_one(of, rzf, f, str(path)) - - -def main(args): - rzf = unzip_http.RemoteZipFile(args.url[0]) - if args.list or len(args.files) == 0: - list_files(rzf) - else: - for f in rzf.infolist(): - download_file(f, rzf, args) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(prog='unzip-http', \ - description="Extract individual files from .zip files over http without downloading the entire archive. HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.") - - parser.add_argument('-l', '--list', action='store_true', default=False, - help="List files in the remote zip file") - parser.add_argument('-f', '--full-filepaths', action='store_true', default=False, - help="Recreate folder structure from zip file when extracting (instead of extracting the files to the current directory)") - parser.add_argument('-o', '--stdout', action='store_true', default=False, - help="Write files to stdout (if multiple files: concatenate them to stdout, in zipfile order)") - - parser.add_argument("url", nargs=1, help="URL of the remote zip file") - parser.add_argument("files", nargs='*', help="Files to extract. If no filenames given, displays .zip contents (filenames and sizes). Each filename can be a wildcard glob.") - - args = parser.parse_args() - main(args) - diff --git a/unzip_http.py b/unzip_http.py old mode 100644 new mode 100755 index 875f65a..6ff1852 --- a/unzip_http.py +++ b/unzip_http.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + # Copyright (c) 2022 Saul Pwanson # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -18,17 +20,43 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +""" +usage: unzip_http [-h] [-l] [-f] [-o] url [files ...] + +Extract individual files from .zip files over http without downloading the +entire archive. HTTP server must send `Accept-Ranges: bytes` and +`Content-Length` in headers. + +positional arguments: + url URL of the remote zip file + files Files to extract. If no filenames given, displays .zip + contents (filenames and sizes). Each filename can be a + wildcard glob. + +options: + -h, --help show this help message and exit + -l, --list List files in the remote zip file + -f, --full-filepaths Recreate folder structure from zip file when extracting + (instead of extracting the files to the current + directory) + -o, --stdout Write files to stdout (if multiple files: concatenate + them to stdout, in zipfile order) +""" + import sys import os import io +import math +import time import zlib import struct import fnmatch +import argparse import pathlib import urllib.parse -__version__ = '0.5.1' +__version__ = '0.6' def error(s): @@ -263,3 +291,95 @@ def read(self, n): self._buffer = self._buffer[n:] return ret + + + ### script start + +class StreamProgress: + def __init__(self, fp, name='', total=0): + self.name = name + self.fp = fp + self.total = total + self.start_time = time.time() + self.last_update = 0 + self.amtread = 0 + + def read(self, n): + r = self.fp.read(n) + self.amtread += len(r) + now = time.time() + if now - self.last_update > 0.1: + self.last_update = now + + elapsed_s = now - self.start_time + sys.stderr.write(f'\r{elapsed_s:.0f}s {self.amtread/10**6:.02f}/{self.total/10**6:.02f}MB ({self.amtread/10**6/elapsed_s:.02f} MB/s) {self.name}') + + if not r: + sys.stderr.write('\n') + + return r + + +def list_files(rzf): + def safelog(x): + return 1 if x == 0 else math.ceil(math.log10(x)) + + digits_compr = max(safelog(f.compress_size) for f in rzf.infolist()) + digits_plain = max(safelog(f.file_size ) for f in rzf.infolist()) + fmtstr = f'%{digits_compr}d -> %{digits_plain}d\t%s' + for f in rzf.infolist(): + print(fmtstr % (f.compress_size, f.file_size, f.filename), file=sys.stderr) + + +def extract_one(outfile, rzf, f, ofname): + print(f'Extracting {f.filename} to {ofname}...', file=sys.stderr) + + fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size) + while r := fp.read(2**18): + outfile.write(r) + + +def download_file(f, rzf, args): + if not any(fnmatch.fnmatch(f.filename, g) for g in args.files): + return + + if args.stdout: + extract_one(sys.stdout.buffer, rzf, f, "stdout") + else: + path = pathlib.Path(f.filename) + if args.full_filepaths: + path.parent.mkdir(parents=True, exist_ok=True) + else: + path = path.name + + with open(str(path), 'wb') as of: + extract_one(of, rzf, f, str(path)) + + +def main(): + parser = argparse.ArgumentParser(prog='unzip-http', \ + description="Extract individual files from .zip files over http without downloading the entire archive. HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.") + + parser.add_argument('-l', '--list', action='store_true', default=False, + help="List files in the remote zip file") + parser.add_argument('-f', '--full-filepaths', action='store_true', default=False, + help="Recreate folder structure from zip file when extracting (instead of extracting the files to the current directory)") + parser.add_argument('-o', '--stdout', action='store_true', default=False, + help="Write files to stdout (if multiple files: concatenate them to stdout, in zipfile order)") + + parser.add_argument("url", nargs=1, help="URL of the remote zip file") + parser.add_argument("files", nargs='*', help="Files to extract. If no filenames given, displays .zip contents (filenames and sizes). Each filename can be a wildcard glob.") + + args = parser.parse_args() + + rzf = RemoteZipFile(args.url[0]) + if args.list or len(args.files) == 0: + list_files(rzf) + else: + for f in rzf.infolist(): + download_file(f, rzf, args) + + + +if __name__ == '__main__': + main()