-
Notifications
You must be signed in to change notification settings - Fork 1
/
pyproject.toml
76 lines (69 loc) · 2.45 KB
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
[tool.poetry]
name = "navigator-document-parser"
version = "0.1.0"
description = "Parsing PDFs and websites containing laws and policies"
authors = ["CPR Tech <[email protected]>"]
packages = [
{ include = "**/*.py", from = "src", to = "navigator_document_parser" },
]
[tool.poetry.dependencies]
python = ">=3.10,<4.0"
awscli = "^1.34.30"
news-please = "^1.5.48"
pandas = "^2.2.3"
numpy = "^1.26.4"
tqdm = "^4.64.1"
pydantic = "^2.9.2"
click = "^8.1.3"
langdetect = "^1.0.9"
playwright = "^1.47.0"
readability-lxml = "^0.8.1"
bleach = "^6.1.0"
python-json-logger = "^2.0.4"
cloudpathlib = { extras = ["s3"], version = "^0.19.0" }
PyMuPDF = "^1.24.9"
google-cloud-translate = "^3.15.5"
psutil = "^6.0.0"
multiprocessing-logging = "^0.3.3"
json-logging = "^1.3.0"
tenacity = "^9.0.0"
pillow = "^10.4.0" # to fix import of Image.LINEAR by dependency
azure-ai-formrecognizer = "^3.2.1"
mock = "^5.1.0"
pypdf2 = "^3.0.1"
azure-pdf-parser = { git = "https://github.com/climatepolicyradar/azure-pdf-parser.git", tag = "v0.4.3" }
lxml = {extras = ["html-clean"], version = "^5.2.2"}
[tool.poetry.dev-dependencies]
pre-commit = "^3.8.0"
pyright = "^1.1.382"
pytest = "^8.3.3"
black = "^24.4.2"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.pydocstyle]
# D100 - Missing docstring in public module
# D103 - Missing docstring in public function
# D104 - Missing docstring in public package
# D107 - Missing docstring in __init__
# D202 - No blank lines allowed after function docstring
# D203 - 1 blank line required before class docstring
# D212 - Multi-line docstring summary should start at the first line
# D213 - Multi-line docstring summary should start at the second line
# D400 - First line should end with a period
# D401 - First line should be in imperative mood (Google styleguide denotes 'descriptive' style.)
# D406 - Section name should end with a newline (incompatible with Google styleguide)
# D407 - Missing dashed underline after section (incompatible with Google styleguide)
# D413 - Missing blank line after last section (incompatible with Google styleguide)
# D415 - First line should end with a period, question mark, or exclamation point
ignore = """
D100,D103,D104,D107,
D202,D203,D212,D213,
D400,D401,D406,D407,D413,D415
"""
[tool.flake8]
max-line-length = 88
extend-ignore = "E203"
[tool.black]
line-length = 88
target-version = ['py39']