Skip to content

Commit

Permalink
Update url regex
Browse files Browse the repository at this point in the history
The url regex also leads to catastrophic backtracking for the
file in #15, which causes a massive slowdown.
  • Loading branch information
GjjvdBurg committed May 19, 2020
1 parent d2b3622 commit d632c40
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 3 deletions.
2 changes: 1 addition & 1 deletion clevercsv/detect_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
"number_1": "^(?=[+-\.\d])[+-]?(?:0|[1-9]\d*)?(((?P<dot>((?<=\d)\.|\.(?=\d)))?(?(dot)(?P<yes_dot>\d*(\d*[eE][+-]?\d+)?)|(?P<no_dot>((?<=\d)[eE][+-]?\d+)?)))|((?P<comma>,)?(?(comma)(?P<yes_comma>\d+(\d+[eE][+-]?\d+)?)|(?P<no_comma>((?<=\d)[eE][+-]?\d+)?))))$",
"number_2": "[+-]?(?:[1-9]|[1-9]\d{0,2})(?:\,\d{3})+\.\d*",
"number_3": "[+-]?(?:[1-9]|[1-9]\d{0,2})(?:\.\d{3})+\,\d*",
"url": "((https?|ftp):\/\/(?!\-))?(((([\p{L}\p{N}]*\-?[\p{L}\p{N}]+)+\.)+([a-z]{2,}|local)(\.[a-z]{2,3})?)|localhost|(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(\:\d{1,5})?))(\/[\p{L}\p{N}_\/()~?=&%\-\#\.:]*)?(\.[a-z]+)?",
"url": "((https?|ftp):\/\/(?!\-))?(((?:[\p{L}\p{N}-]+\.)+([a-z]{2,}|local)(\.[a-z]{2,3})?)|localhost|(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(\:\d{1,5})?))(\/[\p{L}\p{N}_\/()~?=&%\-\#\.:]*)?(\.[a-z]+)?",
"email": r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)",
"ipv4": "(?:\d{1,3}\.){3}\d{1,3}",
"unicode_alphanum": "(\p{N}?\p{L}+[\p{N}\p{L}\ "
Expand Down
2 changes: 0 additions & 2 deletions tests/test_unit/test_detect_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,6 @@ def test_url(self):
"http://?",
"http://??",
"http://??/",
"http://a.b--c.de/",
"http://a.b-.co",
"http://foo.bar/foo(bar)baz quux",
"http://foo.bar?q=Spaces should be encoded",
"http://www.foo.bar./",
Expand Down

0 comments on commit d632c40

Please sign in to comment.