fixed issue with filename sanitation. Added retries to crt.sh for pos…

…sible long response.
Gnomee1337 · Oct 20, 2024 · d14d889 · d14d889
1 parent 23f481a
commit d14d889
Show file tree

Hide file tree

Showing 3 changed files with 59 additions and 14 deletions.
diff --git a/modules/ip_gathering.py b/modules/ip_gathering.py
@@ -68,20 +68,26 @@ async def _remove_original_ips(self, domain: str, all_domain_ips: set):
         return all_domain_ips
 
     async def _write_html_response(self, domain: str, response_text: str):
+        # Use this sanitized domain when generating filenames
+        sanitized_domain = self.sanitize_filename(domain)
         file_path = os.path.join(self.log_dir,
-                                 f'{domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_HTML.txt')
+                                 f'{sanitized_domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_HTML.txt')
         async with aiofiles.open(file_path, 'w') as file:
             await file.write(response_text)
 
     async def _write_extracted_ips_to_file(self, domain: str, viewdnsinfo_ips_output: set):
+        # Use this sanitized domain when generating filenames
+        sanitized_domain = self.sanitize_filename(domain)
         file_path = os.path.join(self.log_dir,
-                                 f'{domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_only_ips.txt')
+                                 f'{sanitized_domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_only_ips.txt')
         async with aiofiles.open(file_path, 'w') as file:
             await file.write("\n".join(str(ip) for ip in viewdnsinfo_ips_output))
 
     async def _write_domain_related_ips_to_file(self, domain: str, domain_ips: set):
+        # Use this sanitized domain when generating filenames
+        sanitized_domain = self.sanitize_filename(domain)
         file_path = os.path.normpath(os.path.join(os.path.realpath(__file__),
-                                                  f'../../cache/{domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_IPs.txt'))
+                                                  f'../../cache/{sanitized_domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_IPs.txt'))
         async with aiofiles.open(file_path, 'w') as file:
             await file.write("\n".join(sorted(domain_ips)))
 
@@ -90,3 +96,12 @@ async def _write_all_possible_ips_to_file(self):
                                                   f'../../cache/ALL_DOMAINS_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_IPs.txt'))
         async with aiofiles.open(file_path, 'w') as file:
             await file.write("\n".join(str(ip) for ip in sorted(self.all_ips)))
+
+    def sanitize_filename(self, domain: str) -> str:
+        # Remove non-alphanumeric characters (keep dots for domain separation)
+        sanitized_domain = re.sub(r'[^A-Za-z0-9.-]+', '', domain)
+        # Optionally limit the length of the domain name in the filename
+        max_length = 50
+        if len(sanitized_domain) > max_length:
+            sanitized_domain = sanitized_domain[:max_length]
+        return sanitized_domain
diff --git a/modules/subdomain_gathering.py b/modules/subdomain_gathering.py
@@ -2,6 +2,8 @@
 import datetime
 import json
 import os
+import re
+
 import aiofiles
 import aiohttp
 from itertools import chain
@@ -61,12 +63,32 @@ async def scrape(self):
         await self._write_domains_to_file(crtsh_output_filtered)
         return list(crtsh_output_filtered)
 
-    async def _fetch_crtsh_data(self):
+    async def _fetch_crtsh_data(self, retries=3, delay=5):
         """Fetch JSON data from crt.sh"""
         url = f'https://crt.sh/?q={self.domain}&output=json'
-        async with aiohttp.ClientSession() as session:
-            async with session.get(url) as resp:
-                return await resp.json()
+        for attempt in range(retries):
+            try:
+                async with aiohttp.ClientSession() as session:
+                    async with session.get(url, timeout=30) as resp:
+                        if resp.status != 200:
+                            print(f"Error: crt.sh Received status code {resp.status} on attempt {attempt + 1}")
+                            continue
+                        # Check if the response is in JSON format
+                        content_type = resp.headers.get('Content-Type', '').lower()
+                        if 'application/json' in content_type:
+                            return await resp.json()
+                        else:
+                            # If not JSON, treat it as text (likely an HTML error page)
+                            text_response = await resp.text()
+                            print(f"crt.sh Unexpected content type: {content_type}")
+                            # Print part of the response for debugging
+                            print("crt.sh Response content: {text_response[:500]}")
+                            return None
+            except (aiohttp.ClientError, asyncio.TimeoutError) as e:
+                print(f"crt.sh request error on attempt {attempt + 1}: {e}")
+                await asyncio.sleep(delay)
+        print("All crt.sh attempts failed.")
+        return None
 
     async def _write_json_response(self, response_json):
         """Write the raw JSON response to a file"""
@@ -78,6 +100,10 @@ async def _write_json_response(self, response_json):
 
     def _extract_and_filter_domains(self, response_json):
         """Extract and filter domains from the JSON response"""
+        # Check if response_json is None before attempting to process it
+        if response_json is None:
+            print("Error: No valid data returned from crt.sh")
+            return []
         crtsh_output = [
             record['name_value'].split('\n') for record in response_json
         ]
@@ -202,7 +228,8 @@ async def _fetch_certspotter_data(self, session):
         """Send GET request to CertSpotter API and retrieve JSON response."""
         async with session.get(
                 f'https://api.certspotter.com/v1/issuances?domain={self.domain}&expand=dns_names',
-                headers={'Accept': 'application/json'}
+                headers={'Accept': 'application/json'},
+                timeout=30
         ) as resp:
             return await resp.json(encoding='utf-8')
 
@@ -212,7 +239,10 @@ async def _write_json_response(self, response_json):
             self.log_dir, f'{self.domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}.json'
         )
         async with aiofiles.open(file_path, 'w') as json_request_file:
-            await json.dump(response_json, json_request_file, sort_keys=True, indent=4)
+            # Create the JSON string synchronously
+            json_string = json.dumps(response_json, sort_keys=True, indent=4)
+            # Write the JSON content to the file asynchronously
+            await json_request_file.write(json_string)
 
     async def _write_domains_to_file(self, certspotter_output):
         """Write extracted domains (no wildcards) to a file asynchronously."""
@@ -258,7 +288,7 @@ async def scrape(self):
 
     async def _fetch_hackertarget_data(self, session):
         """Send GET request to HackerTarget API and retrieve text response."""
-        async with session.get(f'https://api.hackertarget.com/hostsearch/?q={self.domain}') as resp:
+        async with session.get(f'https://api.hackertarget.com/hostsearch/?q={self.domain}', timeout=30) as resp:
             return await resp.text(encoding='utf-8')
 
     async def _write_text_response(self, response_text):
@@ -316,9 +346,9 @@ async def scrape_domain(self, domain: str):
         scrapers = [
             # Add other scrapers here (DnsDumpsterScraper, CertSpotterScraper, etc.)
             CrtShScraper(domain),
-            # DnsDumpsterScraper(domain),
-            # CertSpotterScraper(domain),
-            # HackerTargetScraper(domain),
+            DnsDumpsterScraper(domain),
+            CertSpotterScraper(domain),
+            HackerTargetScraper(domain),
         ]
         for scraper in scrapers:
             subdomains.update(await scraper.scrape())

diff --git a/waf-abuser.py b/waf-abuser.py
@@ -19,7 +19,7 @@ class WAFAbuser:
     def __init__(self, logger_level=logging.CRITICAL):
         self.logger = self.create_logger(logger_level)
         self.input_domains = set()
-        self.similarity_rate = 0
+        self.similarity_rate = 70
         self.domains_only_flag = False
         self.ip_gatherer = IPGatherer()
         self.waf_utils = WAFUtils()