-
Notifications
You must be signed in to change notification settings - Fork 34
/
dell_standalone.py
43 lines (37 loc) · 1.23 KB
/
dell_standalone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""Parsers for Dell LCA PDF.
See an example here https://i.dell.com/sites/csdocuments/CorpComm_Docs/en/carbon-footprint-wyse-3030.pdf
"""
import logging
import re
import datetime
from typing import BinaryIO, Iterator
import os
from tools.parsers.lib import data
from tools.parsers.lib.image import crop, find_text_in_image, image_to_text
from tools.parsers.lib import loader
from tools.parsers.lib import pdf
from tools.parsers.lib import text
import argparse
import requests
import io
from tools.parsers import dell_laptop
argparser = argparse.ArgumentParser()
result = data.DeviceCarbonFootprintData()
argparser.add_argument("-s", "--source", required=True, help="URL to .pdf to be converted")
args = vars(argparser.parse_args())
pdf_path = args["source"]
url = ""
rm_tempfile = False
if re.search('http(s)*\:\/\/*.', pdf_path):
open('./tempfile.pdf', 'wb').write(requests.get(pdf_path).content)
url=pdf_path
pdf_path = "./tempfile.pdf"
rm_tempfile = True
with open(pdf_path, 'rb') as fh:
for result in dell_laptop.parse(io.BytesIO(fh.read()), url):
result.data['sources_hash']=data.md5_file(pdf_path)
result.data['sources']=url
print(result.as_csv_row())
if rm_tempfile:
os.remove('./tempfile.pdf')
quit()