Skip to content

Commit

Permalink
fixed issue with filename sanitation. Added retries to crt.sh for pos…
Browse files Browse the repository at this point in the history
…sible long response.
  • Loading branch information
Gnomee1337 committed Oct 20, 2024
1 parent 23f481a commit d14d889
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 14 deletions.
21 changes: 18 additions & 3 deletions modules/ip_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,26 @@ async def _remove_original_ips(self, domain: str, all_domain_ips: set):
return all_domain_ips

async def _write_html_response(self, domain: str, response_text: str):
# Use this sanitized domain when generating filenames
sanitized_domain = self.sanitize_filename(domain)
file_path = os.path.join(self.log_dir,
f'{domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_HTML.txt')
f'{sanitized_domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_HTML.txt')
async with aiofiles.open(file_path, 'w') as file:
await file.write(response_text)

async def _write_extracted_ips_to_file(self, domain: str, viewdnsinfo_ips_output: set):
# Use this sanitized domain when generating filenames
sanitized_domain = self.sanitize_filename(domain)
file_path = os.path.join(self.log_dir,
f'{domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_only_ips.txt')
f'{sanitized_domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_only_ips.txt')
async with aiofiles.open(file_path, 'w') as file:
await file.write("\n".join(str(ip) for ip in viewdnsinfo_ips_output))

async def _write_domain_related_ips_to_file(self, domain: str, domain_ips: set):
# Use this sanitized domain when generating filenames
sanitized_domain = self.sanitize_filename(domain)
file_path = os.path.normpath(os.path.join(os.path.realpath(__file__),
f'../../cache/{domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_IPs.txt'))
f'../../cache/{sanitized_domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_IPs.txt'))
async with aiofiles.open(file_path, 'w') as file:
await file.write("\n".join(sorted(domain_ips)))

Expand All @@ -90,3 +96,12 @@ async def _write_all_possible_ips_to_file(self):
f'../../cache/ALL_DOMAINS_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}_IPs.txt'))
async with aiofiles.open(file_path, 'w') as file:
await file.write("\n".join(str(ip) for ip in sorted(self.all_ips)))

def sanitize_filename(self, domain: str) -> str:
# Remove non-alphanumeric characters (keep dots for domain separation)
sanitized_domain = re.sub(r'[^A-Za-z0-9.-]+', '', domain)
# Optionally limit the length of the domain name in the filename
max_length = 50
if len(sanitized_domain) > max_length:
sanitized_domain = sanitized_domain[:max_length]
return sanitized_domain
50 changes: 40 additions & 10 deletions modules/subdomain_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import datetime
import json
import os
import re

import aiofiles
import aiohttp
from itertools import chain
Expand Down Expand Up @@ -61,12 +63,32 @@ async def scrape(self):
await self._write_domains_to_file(crtsh_output_filtered)
return list(crtsh_output_filtered)

async def _fetch_crtsh_data(self):
async def _fetch_crtsh_data(self, retries=3, delay=5):
"""Fetch JSON data from crt.sh"""
url = f'https://crt.sh/?q={self.domain}&output=json'
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.json()
for attempt in range(retries):
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=30) as resp:
if resp.status != 200:
print(f"Error: crt.sh Received status code {resp.status} on attempt {attempt + 1}")
continue
# Check if the response is in JSON format
content_type = resp.headers.get('Content-Type', '').lower()
if 'application/json' in content_type:
return await resp.json()
else:
# If not JSON, treat it as text (likely an HTML error page)
text_response = await resp.text()
print(f"crt.sh Unexpected content type: {content_type}")
# Print part of the response for debugging
print("crt.sh Response content: {text_response[:500]}")
return None
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
print(f"crt.sh request error on attempt {attempt + 1}: {e}")
await asyncio.sleep(delay)
print("All crt.sh attempts failed.")
return None

async def _write_json_response(self, response_json):
"""Write the raw JSON response to a file"""
Expand All @@ -78,6 +100,10 @@ async def _write_json_response(self, response_json):

def _extract_and_filter_domains(self, response_json):
"""Extract and filter domains from the JSON response"""
# Check if response_json is None before attempting to process it
if response_json is None:
print("Error: No valid data returned from crt.sh")
return []
crtsh_output = [
record['name_value'].split('\n') for record in response_json
]
Expand Down Expand Up @@ -202,7 +228,8 @@ async def _fetch_certspotter_data(self, session):
"""Send GET request to CertSpotter API and retrieve JSON response."""
async with session.get(
f'https://api.certspotter.com/v1/issuances?domain={self.domain}&expand=dns_names',
headers={'Accept': 'application/json'}
headers={'Accept': 'application/json'},
timeout=30
) as resp:
return await resp.json(encoding='utf-8')

Expand All @@ -212,7 +239,10 @@ async def _write_json_response(self, response_json):
self.log_dir, f'{self.domain}_{datetime.datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")}.json'
)
async with aiofiles.open(file_path, 'w') as json_request_file:
await json.dump(response_json, json_request_file, sort_keys=True, indent=4)
# Create the JSON string synchronously
json_string = json.dumps(response_json, sort_keys=True, indent=4)
# Write the JSON content to the file asynchronously
await json_request_file.write(json_string)

async def _write_domains_to_file(self, certspotter_output):
"""Write extracted domains (no wildcards) to a file asynchronously."""
Expand Down Expand Up @@ -258,7 +288,7 @@ async def scrape(self):

async def _fetch_hackertarget_data(self, session):
"""Send GET request to HackerTarget API and retrieve text response."""
async with session.get(f'https://api.hackertarget.com/hostsearch/?q={self.domain}') as resp:
async with session.get(f'https://api.hackertarget.com/hostsearch/?q={self.domain}', timeout=30) as resp:
return await resp.text(encoding='utf-8')

async def _write_text_response(self, response_text):
Expand Down Expand Up @@ -316,9 +346,9 @@ async def scrape_domain(self, domain: str):
scrapers = [
# Add other scrapers here (DnsDumpsterScraper, CertSpotterScraper, etc.)
CrtShScraper(domain),
# DnsDumpsterScraper(domain),
# CertSpotterScraper(domain),
# HackerTargetScraper(domain),
DnsDumpsterScraper(domain),
CertSpotterScraper(domain),
HackerTargetScraper(domain),
]
for scraper in scrapers:
subdomains.update(await scraper.scrape())
Expand Down
2 changes: 1 addition & 1 deletion waf-abuser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class WAFAbuser:
def __init__(self, logger_level=logging.CRITICAL):
self.logger = self.create_logger(logger_level)
self.input_domains = set()
self.similarity_rate = 0
self.similarity_rate = 70
self.domains_only_flag = False
self.ip_gatherer = IPGatherer()
self.waf_utils = WAFUtils()
Expand Down

0 comments on commit d14d889

Please sign in to comment.