From 014dc496babfa20d5a67adade27c9a51a98b97ae Mon Sep 17 00:00:00 2001 From: Roman Snegirev Date: Mon, 13 Mar 2023 16:10:15 +0300 Subject: [PATCH] Update patterns, fix tests --- is_bot/__init__.py | 2 +- is_bot/_patterns.py | 55 ++++++++++++++++++++++++++++++++++++++++---- tests/test_is_bot.py | 8 +++---- 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/is_bot/__init__.py b/is_bot/__init__.py index 794e05f..905ddd1 100644 --- a/is_bot/__init__.py +++ b/is_bot/__init__.py @@ -1,5 +1,5 @@ from ._is_bot import Bots -__version__ = '0.2.1' +__version__ = '0.2.2' __all__ = ('Bots',) diff --git a/is_bot/_patterns.py b/is_bot/_patterns.py index 6b804a2..ece7ba9 100644 --- a/is_bot/_patterns.py +++ b/is_bot/_patterns.py @@ -1,5 +1,6 @@ default_patterns = { " daum[ /]", + " DCV$", " deusu/", "(?:^| )site", "@[a-z]", @@ -8,27 +9,42 @@ "\\[at\\][a-z]", "^12345", "^<", - "^[\\w \\.\\-\\(\\)]+(/v?\\d+(\\.\\d+)?(\\.\\d{1,10})?)?$", + "^[\\w \\.]+/v?\\d+(\\.\\d+)?(\\.\\d{1,10})?$", + "^[\\w\\-\\(\\)]+$", "^[^ ]{50,}$", + "^ace explorer", + "^acoon", "^active", "^ad muncher", "^anglesharp/", "^anonymous", + "^apple-pubsub/", + "^astute srm", "^avsdevicesdk/", "^axios/", "^bidtellect/", "^biglotron", + "^blackboard safeassign", + "^blocknote.net", + "^braze sender", + "^captivenetworksupport", "^castro", + "^cf-uc ", "^clamav[ /]", "^cobweb/", "^coccoc", "^custom", + "^dap ", "^ddg[_-]android", "^discourse", "^dispatch/\\d", "^downcast/", "^duckduckgo", + "^email", + "^enigma browser", + "^evernote clip resolver", "^facebook", + "^faraday", "^fdm[ /]\\d", "^getright/", "^gozilla/", @@ -36,20 +52,35 @@ "^hobbit", "^hotzonu", "^hwcdn/", + "^invision", "^jeode/", + "^jetbrains", "^jetty/", "^jigsaw", "^linkdex", "^lwp[-: ]", + "^mailchimp\\.com$", "^metauri", "^microsoft bits", + "^microsoft data", + "^microsoft office existence", + "^microsoft office protocol discovery", + "^microsoft windows network diagnostics", + "^microsoft-cryptoapi", + "^microsoft-webdav-miniredir", "^movabletype", "^mozilla/\\d\\.\\d \\(compatible;?\\)$", "^mozilla/\\d\\.\\d \\w*$", + "^my browser$", "^navermailapp", "^netsurf", + "^nginx\\W", + "^node-superagent", + "^octopus", "^offline explorer", - "^phantom", + "^pagething", + "^panscient", + "^perimeterx", "^php", "^postman", "^postrank", @@ -57,26 +88,35 @@ "^read", "^reed", "^restsharp/", + "^shareaza", + "^shockwave flash", "^snapchat", "^space bison", + "^sprinklr", "^svn", "^swcd ", + "^t-online browser", "^taringa", "^test certificate info", + "^the knowledge ai", + "^thinklab", "^thumbor/", + "^traackr.com", "^tumblr/", - "^user-agent:mozilla", - "^valid", + "^uptime", + "^vbulletin", "^venus/fedoraplanet", "^w3c", "^webbandit/", "^webcopier", "^wget", "^whatsapp", + "^www-mechanize", "^xenu link sleuth", "^yahoo", "^yandex", "^zdm/\\d", + "^zeushdthree", "^zoom marketplace/", "^{{.*}}$", "adbeat\\.com", @@ -96,7 +136,7 @@ "client", "cloud", "crawl", - "cryptoapi", + "daemon", "dareboost", "datanyze", "dataprovider", @@ -132,10 +172,13 @@ "optimize", "pageburst", "pagespeed", + "parse", "perl", + "phantom", "pingdom", "powermarks", "preview", + "probe", "proxy", "ptst[ /]\\d", "reader", @@ -156,12 +199,14 @@ "synapse", "synthetic", "taginspector/", + "toolbar", "torrent", "tracemyfile", "transcoder", "trendsmapresolver", "twingly recon", "url", + "valid", "virtuoso", "wappalyzer", "webglance", diff --git a/tests/test_is_bot.py b/tests/test_is_bot.py index 83e6eb7..cde3170 100644 --- a/tests/test_is_bot.py +++ b/tests/test_is_bot.py @@ -8,10 +8,10 @@ 'Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36 Chrome-Lighthouse', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.0 Safari/538.1' - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/104.0.5112.101 Safari/537.36' - 'Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)' - 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.268' + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.0 Safari/538.1', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/104.0.5112.101 Safari/537.36', + 'Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)', + 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.268', ] NOT_BOTS = [