Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[build] add support for windows #17 #18

Merged
merged 2 commits into from
Jul 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ jobs:
run: |
python setup.py develop
- name: Run basic test
run: unzip-http http://psa.download.navigation.com/automotive/PSA/RT6-SMEGx/M49RG20-Q0420-2001.ZIP DATA/CURR_VERS_NAVI.TXT
run: unzip_http.py http://psa.download.navigation.com/automotive/PSA/RT6-SMEGx/M49RG20-Q0420-2001.ZIP DATA/CURR_VERS_NAVI.TXT
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Extract individual files from .zip files over http without downloading the entir

## Usage

unzip-http [-l] [-f] [-o] <url> <filenames..>
unzip_http [-l] [-f] [-o] <url> <filenames..>

Extract <filenames> from a remote .zip at `<url>` to stdout.

Expand Down
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def requirements():

setup(
name="unzip-http",
version="0.5.1",
version="0.6",
description="extract files from .zip files over http without downloading entire archive",
long_description=readme(),
long_description_content_type="text/markdown",
Expand All @@ -27,6 +27,9 @@ def requirements():
url="https://github.com/saulpw/unzip-http",
python_requires=">=3.8",
py_modules=["unzip_http"],
scripts=["unzip-http"],
scripts=["unzip_http.py"],
entry_points={
"console_scripts": ["unzip_http=unzip_http:main"],
},
install_requires=requirements(),
)
123 changes: 0 additions & 123 deletions unzip-http

This file was deleted.

122 changes: 121 additions & 1 deletion unzip_http.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

# Copyright (c) 2022 Saul Pwanson
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand All @@ -18,17 +20,43 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""
usage: unzip_http [-h] [-l] [-f] [-o] url [files ...]

Extract individual files from .zip files over http without downloading the
entire archive. HTTP server must send `Accept-Ranges: bytes` and
`Content-Length` in headers.

positional arguments:
url URL of the remote zip file
files Files to extract. If no filenames given, displays .zip
contents (filenames and sizes). Each filename can be a
wildcard glob.

options:
-h, --help show this help message and exit
-l, --list List files in the remote zip file
-f, --full-filepaths Recreate folder structure from zip file when extracting
(instead of extracting the files to the current
directory)
-o, --stdout Write files to stdout (if multiple files: concatenate
them to stdout, in zipfile order)
"""

import sys
import os
import io
import math
import time
import zlib
import struct
import fnmatch
import argparse
import pathlib
import urllib.parse


__version__ = '0.5.1'
__version__ = '0.6'


def error(s):
Expand Down Expand Up @@ -263,3 +291,95 @@ def read(self, n):
self._buffer = self._buffer[n:]

return ret


### script start

class StreamProgress:
def __init__(self, fp, name='', total=0):
self.name = name
self.fp = fp
self.total = total
self.start_time = time.time()
self.last_update = 0
self.amtread = 0

def read(self, n):
r = self.fp.read(n)
self.amtread += len(r)
now = time.time()
if now - self.last_update > 0.1:
self.last_update = now

elapsed_s = now - self.start_time
sys.stderr.write(f'\r{elapsed_s:.0f}s {self.amtread/10**6:.02f}/{self.total/10**6:.02f}MB ({self.amtread/10**6/elapsed_s:.02f} MB/s) {self.name}')

if not r:
sys.stderr.write('\n')

return r


def list_files(rzf):
def safelog(x):
return 1 if x == 0 else math.ceil(math.log10(x))

digits_compr = max(safelog(f.compress_size) for f in rzf.infolist())
digits_plain = max(safelog(f.file_size ) for f in rzf.infolist())
fmtstr = f'%{digits_compr}d -> %{digits_plain}d\t%s'
for f in rzf.infolist():
print(fmtstr % (f.compress_size, f.file_size, f.filename), file=sys.stderr)


def extract_one(outfile, rzf, f, ofname):
print(f'Extracting {f.filename} to {ofname}...', file=sys.stderr)

fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size)
while r := fp.read(2**18):
outfile.write(r)


def download_file(f, rzf, args):
if not any(fnmatch.fnmatch(f.filename, g) for g in args.files):
return

if args.stdout:
extract_one(sys.stdout.buffer, rzf, f, "stdout")
else:
path = pathlib.Path(f.filename)
if args.full_filepaths:
path.parent.mkdir(parents=True, exist_ok=True)
else:
path = path.name

with open(str(path), 'wb') as of:
extract_one(of, rzf, f, str(path))


def main():
parser = argparse.ArgumentParser(prog='unzip-http', \
description="Extract individual files from .zip files over http without downloading the entire archive. HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.")

parser.add_argument('-l', '--list', action='store_true', default=False,
help="List files in the remote zip file")
parser.add_argument('-f', '--full-filepaths', action='store_true', default=False,
help="Recreate folder structure from zip file when extracting (instead of extracting the files to the current directory)")
parser.add_argument('-o', '--stdout', action='store_true', default=False,
help="Write files to stdout (if multiple files: concatenate them to stdout, in zipfile order)")

parser.add_argument("url", nargs=1, help="URL of the remote zip file")
parser.add_argument("files", nargs='*', help="Files to extract. If no filenames given, displays .zip contents (filenames and sizes). Each filename can be a wildcard glob.")

args = parser.parse_args()

rzf = RemoteZipFile(args.url[0])
if args.list or len(args.files) == 0:
list_files(rzf)
else:
for f in rzf.infolist():
download_file(f, rzf, args)



if __name__ == '__main__':
main()
Loading