From 93f4182f7bcb453271d8c3ba7d5d4736db8530eb Mon Sep 17 00:00:00 2001 From: thewhiteh4t Date: Fri, 20 Oct 2023 01:46:07 +0530 Subject: [PATCH] adjusted for new tldextract version --- modules/crawler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/crawler.py b/modules/crawler.py index 9ac085e..6d63cfc 100644 --- a/modules/crawler.py +++ b/modules/crawler.py @@ -58,7 +58,10 @@ def crawler(target, output, data): base_url = f'{protocol}://{temp_tgt}' else: ext = tldextract.extract(target) - hostname = '.'.join(part for part in ext if part) + if ext.subdomain: + hostname = f'{ext.subdomain}.{ext.domain}.{ext.suffix}' + else: + hostname = ext.registered_domain base_url = f'{protocol}://{hostname}' r_url = f'{base_url}/robots.txt' sm_url = f'{base_url}/sitemap.xml'