Skip to content

Commit 8318c73

Browse files
authored
Merge pull request #18 from saulpw/kef/17
[build] add support for windows #17
2 parents 9b48e93 + 50dbdba commit 8318c73

File tree

5 files changed

+128
-128
lines changed

5 files changed

+128
-128
lines changed

.github/workflows/main.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,4 @@ jobs:
2828
run: |
2929
python setup.py develop
3030
- name: Run basic test
31-
run: unzip-http http://psa.download.navigation.com/automotive/PSA/RT6-SMEGx/M49RG20-Q0420-2001.ZIP DATA/CURR_VERS_NAVI.TXT
31+
run: unzip_http.py http://psa.download.navigation.com/automotive/PSA/RT6-SMEGx/M49RG20-Q0420-2001.ZIP DATA/CURR_VERS_NAVI.TXT

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Extract individual files from .zip files over http without downloading the entir
88

99
## Usage
1010

11-
unzip-http [-l] [-f] [-o] <url> <filenames..>
11+
unzip_http [-l] [-f] [-o] <url> <filenames..>
1212

1313
Extract <filenames> from a remote .zip at `<url>` to stdout.
1414

setup.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def requirements():
1414

1515
setup(
1616
name="unzip-http",
17-
version="0.5.1",
17+
version="0.6",
1818
description="extract files from .zip files over http without downloading entire archive",
1919
long_description=readme(),
2020
long_description_content_type="text/markdown",
@@ -27,6 +27,9 @@ def requirements():
2727
url="https://github.com/saulpw/unzip-http",
2828
python_requires=">=3.8",
2929
py_modules=["unzip_http"],
30-
scripts=["unzip-http"],
30+
scripts=["unzip_http.py"],
31+
entry_points={
32+
"console_scripts": ["unzip_http=unzip_http:main"],
33+
},
3134
install_requires=requirements(),
3235
)

unzip-http

-123
This file was deleted.

unzip_http.py

100644100755
+121-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env python3
2+
13
# Copyright (c) 2022 Saul Pwanson
24
#
35
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -18,17 +20,43 @@
1820
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1921
# SOFTWARE.
2022

23+
"""
24+
usage: unzip_http [-h] [-l] [-f] [-o] url [files ...]
25+
26+
Extract individual files from .zip files over http without downloading the
27+
entire archive. HTTP server must send `Accept-Ranges: bytes` and
28+
`Content-Length` in headers.
29+
30+
positional arguments:
31+
url URL of the remote zip file
32+
files Files to extract. If no filenames given, displays .zip
33+
contents (filenames and sizes). Each filename can be a
34+
wildcard glob.
35+
36+
options:
37+
-h, --help show this help message and exit
38+
-l, --list List files in the remote zip file
39+
-f, --full-filepaths Recreate folder structure from zip file when extracting
40+
(instead of extracting the files to the current
41+
directory)
42+
-o, --stdout Write files to stdout (if multiple files: concatenate
43+
them to stdout, in zipfile order)
44+
"""
45+
2146
import sys
2247
import os
2348
import io
49+
import math
50+
import time
2451
import zlib
2552
import struct
2653
import fnmatch
54+
import argparse
2755
import pathlib
2856
import urllib.parse
2957

3058

31-
__version__ = '0.5.1'
59+
__version__ = '0.6'
3260

3361

3462
def error(s):
@@ -263,3 +291,95 @@ def read(self, n):
263291
self._buffer = self._buffer[n:]
264292

265293
return ret
294+
295+
296+
### script start
297+
298+
class StreamProgress:
299+
def __init__(self, fp, name='', total=0):
300+
self.name = name
301+
self.fp = fp
302+
self.total = total
303+
self.start_time = time.time()
304+
self.last_update = 0
305+
self.amtread = 0
306+
307+
def read(self, n):
308+
r = self.fp.read(n)
309+
self.amtread += len(r)
310+
now = time.time()
311+
if now - self.last_update > 0.1:
312+
self.last_update = now
313+
314+
elapsed_s = now - self.start_time
315+
sys.stderr.write(f'\r{elapsed_s:.0f}s {self.amtread/10**6:.02f}/{self.total/10**6:.02f}MB ({self.amtread/10**6/elapsed_s:.02f} MB/s) {self.name}')
316+
317+
if not r:
318+
sys.stderr.write('\n')
319+
320+
return r
321+
322+
323+
def list_files(rzf):
324+
def safelog(x):
325+
return 1 if x == 0 else math.ceil(math.log10(x))
326+
327+
digits_compr = max(safelog(f.compress_size) for f in rzf.infolist())
328+
digits_plain = max(safelog(f.file_size ) for f in rzf.infolist())
329+
fmtstr = f'%{digits_compr}d -> %{digits_plain}d\t%s'
330+
for f in rzf.infolist():
331+
print(fmtstr % (f.compress_size, f.file_size, f.filename), file=sys.stderr)
332+
333+
334+
def extract_one(outfile, rzf, f, ofname):
335+
print(f'Extracting {f.filename} to {ofname}...', file=sys.stderr)
336+
337+
fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size)
338+
while r := fp.read(2**18):
339+
outfile.write(r)
340+
341+
342+
def download_file(f, rzf, args):
343+
if not any(fnmatch.fnmatch(f.filename, g) for g in args.files):
344+
return
345+
346+
if args.stdout:
347+
extract_one(sys.stdout.buffer, rzf, f, "stdout")
348+
else:
349+
path = pathlib.Path(f.filename)
350+
if args.full_filepaths:
351+
path.parent.mkdir(parents=True, exist_ok=True)
352+
else:
353+
path = path.name
354+
355+
with open(str(path), 'wb') as of:
356+
extract_one(of, rzf, f, str(path))
357+
358+
359+
def main():
360+
parser = argparse.ArgumentParser(prog='unzip-http', \
361+
description="Extract individual files from .zip files over http without downloading the entire archive. HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.")
362+
363+
parser.add_argument('-l', '--list', action='store_true', default=False,
364+
help="List files in the remote zip file")
365+
parser.add_argument('-f', '--full-filepaths', action='store_true', default=False,
366+
help="Recreate folder structure from zip file when extracting (instead of extracting the files to the current directory)")
367+
parser.add_argument('-o', '--stdout', action='store_true', default=False,
368+
help="Write files to stdout (if multiple files: concatenate them to stdout, in zipfile order)")
369+
370+
parser.add_argument("url", nargs=1, help="URL of the remote zip file")
371+
parser.add_argument("files", nargs='*', help="Files to extract. If no filenames given, displays .zip contents (filenames and sizes). Each filename can be a wildcard glob.")
372+
373+
args = parser.parse_args()
374+
375+
rzf = RemoteZipFile(args.url[0])
376+
if args.list or len(args.files) == 0:
377+
list_files(rzf)
378+
else:
379+
for f in rzf.infolist():
380+
download_file(f, rzf, args)
381+
382+
383+
384+
if __name__ == '__main__':
385+
main()

0 commit comments

Comments
 (0)