Skip to content

Commit 69fea3b

Browse files
authored
extraction: add heuristics and explicit 3.13 support (#173)
1 parent 3ea88b3 commit 69fea3b

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

htmldate/extractors.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@
7373
contains(@class, 'fa-clock-o') or
7474
contains(@class, 'fa-calendar') or
7575
contains(@class, 'fecha') or
76-
contains(@class, 'parution')
76+
contains(@class, 'parution') or
77+
contains(@id, 'footer-info-lastmod')
7778
] |
7879
.//footer | .//small
7980
"""
@@ -173,7 +174,7 @@
173174

174175
# use of regex module for speed?
175176
TEXT_PATTERNS = re.compile(
176-
r'(?:date[^0-9"]{,20}|updated|published|on)(?:[ :])*?([0-9]{1,4})[./]([0-9]{1,2})[./]([0-9]{2,4})|' # EN
177+
r'(?:date[^0-9"]{,20}|updated|last-modified|published|posted|on)(?:[ :])*?([0-9]{1,4})[./]([0-9]{1,2})[./]([0-9]{2,4})|' # EN
177178
r"(?:Datum|Stand|Veröffentlicht am):? ?([0-9]{1,2})\.([0-9]{1,2})\.([0-9]{2,4})|" # DE
178179
r"(?:güncellen?me|yayı(?:m|n)lan?ma) *?(?:tarihi)? *?:? *?([0-9]{1,2})[./]([0-9]{1,2})[./]([0-9]{2,4})|"
179180
r"([0-9]{1,2})[./]([0-9]{1,2})[./]([0-9]{2,4}) *?(?:'de|'da|'te|'ta|’de|’da|’te|’ta|tarihinde) *(?:güncellendi|yayı(?:m|n)landı)", # TR

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ classifiers = [
4242
"Programming Language :: Python :: 3.10",
4343
"Programming Language :: Python :: 3.11",
4444
"Programming Language :: Python :: 3.12",
45+
"Programming Language :: Python :: 3.13",
4546
"Topic :: Internet :: WWW/HTTP",
4647
"Topic :: Scientific/Engineering :: Information Analysis",
4748
"Topic :: Text Processing :: Linguistic",

0 commit comments

Comments
 (0)