Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Miguel Sanchez authored and Miguel Sanchez committed Aug 12, 2024
1 parent ee9b389 commit a543f8e
Showing 1 changed file with 241 additions and 13 deletions.
254 changes: 241 additions & 13 deletions spyhunt.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
import random
import string
import html

import asyncio
import aiohttp


warnings.filterwarnings(action='ignore',module='bs4')
Expand Down Expand Up @@ -255,6 +256,16 @@ def scan(command: str) -> str:
type=str, help='scan for sensitive info in javascript files',
metavar='domain.com')

crawlers_group.add_argument('-dp', '--depth',
type=str, help='depth of the crawl',
metavar='10')

crawlers_group.add_argument('-je', '--javascript_endpoints',
type=str, help='extract javascript endpoints',
metavar='file.txt')

parser.add_argument("-c", "--concurrency", type=int, default=10, help="Maximum number of concurrent requests")

nuclei_group.add_argument('-nl', '--nuclei_lfi', action='store_true', help="Find Local File Inclusion with nuclei")

passiverecon_group.add_argument('-gs', '--google', action='store_true', help='Google Search')
Expand Down Expand Up @@ -551,17 +562,140 @@ def check_host_header_injection(domainlist):
commands(f"waybackurls {args.waybackurls}")

if args.j:
if args.save:
print(Fore.CYAN + "Saving output to {}".format(args.save))
commands(f"echo {args.j} | waybackurls | grep '\\.js$' | uniq | sort >> {args.save}")
commands(f"echo {args.j} | gau | grep -Eo 'https?://\\S+?\\.js' | anew >> {args.save}")
if path.exists(f"{args.save}"):
print(Fore.GREEN + "DONE!")
if not path.exists(f"{args.save}"):
print(Fore.RED + "ERROR!")
else:
commands(f"echo {args.j} | waybackurls | grep '\\.js$' | anew")
commands(f"echo {args.j} | gau | grep -Eo 'https?://\\S+?\\.js' | anew")
init(autoreset=True)

async def fetch(session, url):
try:
async with session.get(url, timeout=10) as response:
if response.status == 200:
return await response.text()
elif response.status == 404:
# Silently ignore 404 errors
return None
else:
print(f"{Fore.YELLOW}Warning: {url} returned status code {response.status}{Style.RESET_ALL}")
return None
except aiohttp.ClientError as e:
print(f"{Fore.RED}Error fetching {url}: {e}{Style.RESET_ALL}")
except asyncio.TimeoutError:
print(f"{Fore.RED}Timeout error fetching {url}{Style.RESET_ALL}")
except Exception as e:
print(f"{Fore.RED}Unexpected error fetching {url}: {e}{Style.RESET_ALL}")
return None

def is_valid_url(url):
try:
parsed = urlparse(url)
return bool(parsed.netloc) and bool(parsed.scheme)
except Exception as e:
print(f"{Fore.RED}Error parsing URL {url}: {e}{Style.RESET_ALL}")
return False

def is_same_domain(url, domain):
try:
return urlparse(url).netloc == domain
except Exception as e:
print(f"{Fore.RED}Error comparing domains for {url}: {e}{Style.RESET_ALL}")
return False

async def get_js_links(session, url, domain):
js_links = set()
new_links = set()
html = await fetch(session, url)
if html:
try:
soup = BeautifulSoup(html, 'html.parser')

for script in soup.find_all('script', src=True):
script_url = urljoin(url, script['src'])
if is_valid_url(script_url) and is_same_domain(script_url, domain):
js_links.add(script_url)

for script in soup.find_all('script'):
if script.string:
js_urls = re.findall(r'[\'"]([^\'"]*\.js)[\'"]', script.string)
for js_url in js_urls:
full_js_url = urljoin(url, js_url)
if is_valid_url(full_js_url) and is_same_domain(full_js_url, domain):
js_links.add(full_js_url)

new_links = set(urljoin(url, link['href']) for link in soup.find_all('a', href=True))
except Exception as e:
print(f"{Fore.RED}Error parsing HTML from {url}: {e}{Style.RESET_ALL}")

return js_links, new_links

async def crawl_website(url, max_depth, concurrency):
try:
domain = urlparse(url).netloc
visited = set()
to_visit = {url}
js_files = set()
semaphore = asyncio.Semaphore(concurrency)

async def bounded_get_js_links(session, url, domain):
async with semaphore:
return await get_js_links(session, url, domain)

async with aiohttp.ClientSession() as session:
for depth in range(int(max_depth) + 1):
if not to_visit:
break

tasks = [bounded_get_js_links(session, url, domain) for url in to_visit]
results = await asyncio.gather(*tasks, return_exceptions=True)

visited.update(to_visit)
to_visit = set()

for result in results:
if isinstance(result, Exception):
print(f"{Fore.RED}Error during crawl: {result}{Style.RESET_ALL}")
continue
js_links, new_links = result
js_files.update(js_links)
to_visit.update(link for link in new_links
if is_valid_url(link) and is_same_domain(link, domain) and link not in visited)

print(f"{Fore.CYAN}Depth {depth}: Found {len(js_files)} JS files, {len(to_visit)} new URLs to visit{Style.RESET_ALL}")

return js_files
except Exception as e:
print(f"{Fore.RED}Unexpected error during crawl: {e}{Style.RESET_ALL}")
return set()

async def main():
try:
print(f"{Fore.CYAN}Crawling {Fore.GREEN}{args.j}{Fore.CYAN} for JavaScript files...{Style.RESET_ALL}\n")
js_files = await crawl_website(args.j, args.depth, args.concurrency)

if js_files:
print(f"\n{Fore.YELLOW}Found {len(js_files)} JavaScript files:{Style.RESET_ALL}")
for js_file in sorted(js_files):
print(js_file)

if args.save:
try:
with open(args.save, 'w') as f:
for js_file in sorted(js_files):
f.write(f"{js_file}\n")
print(f"\n{Fore.GREEN}Results saved to {args.save}{Style.RESET_ALL}")
except IOError as e:
print(f"{Fore.RED}Error saving results to file: {e}{Style.RESET_ALL}")
else:
print(f"{Fore.RED}No JavaScript files found.{Style.RESET_ALL}")
except Exception as e:
print(f"{Fore.RED}Unexpected error in main function: {e}{Style.RESET_ALL}")

if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print(f"{Fore.YELLOW}Crawl interrupted by user.{Style.RESET_ALL}")
sys.exit(1)
except Exception as e:
print(f"{Fore.RED}Fatal error: {e}{Style.RESET_ALL}")
sys.exit(1)

if args.dns:
if args.save:
Expand Down Expand Up @@ -1554,4 +1688,98 @@ def main():
print()

if __name__ == "__main__":
main()
main()

if args.javascript_endpoints:

init(autoreset=True)

async def fetch(session, url):
try:
async with session.get(url, timeout=10) as response:
if response.status == 200:
return await response.text()
else:
pass
return None
except aiohttp.ClientError as e:
print(f"{Fore.RED}Error fetching {url}: {e}{Style.RESET_ALL}")
except asyncio.TimeoutError:
print(f"{Fore.RED}Timeout error fetching {url}{Style.RESET_ALL}")
except Exception as e:
print(f"{Fore.RED}Unexpected error fetching {url}: {e}{Style.RESET_ALL}")
return None

def find_endpoints(js_content):
# This regex pattern looks for common endpoint patterns in JavaScript
endpoint_pattern = r'(?:"|\'|\`)(/(?:api/)?[\w-]+(?:/[\w-]+)*(?:\.\w+)?)'
endpoints = set(re.findall(endpoint_pattern, js_content))
return endpoints

async def analyze_js_file(session, js_url):
js_content = await fetch(session, js_url)
if js_content:
endpoints = find_endpoints(js_content)
return js_url, endpoints
return js_url, set()

async def process_js_files(file_path, concurrency):
js_files = {}

try:
with open(file_path, 'r') as file:
js_urls = [line.strip() for line in file if line.strip()]

async with aiohttp.ClientSession() as session:
semaphore = asyncio.Semaphore(concurrency)

async def bounded_analyze_js_file(js_url):
async with semaphore:
return await analyze_js_file(session, js_url)

tasks = [bounded_analyze_js_file(js_url) for js_url in js_urls]
results = await asyncio.gather(*tasks)

for js_url, endpoints in results:
js_files[js_url] = endpoints

except Exception as e:
print(f"{Fore.RED}Error processing JS file list: {e}{Style.RESET_ALL}")

return js_files

async def main():
print(f"{Fore.CYAN}Analyzing JavaScript files from {Fore.GREEN}{args.javascript_endpoints}{Style.RESET_ALL}\n")
js_files = await process_js_files(args.javascript_endpoints, args.concurrency)

if js_files:
print(f"\n{Fore.YELLOW}Analyzed {len(js_files)} JavaScript files:{Style.RESET_ALL}")
for js_url, endpoints in js_files.items():
print(f"\n{Fore.CYAN}{js_url}{Style.RESET_ALL}")
if endpoints:
print(f"{Fore.GREEN}Endpoints found:{Style.RESET_ALL}")
for endpoint in sorted(endpoints):
print(f" {endpoint}")
else:
print(f"{Fore.YELLOW}No endpoints found{Style.RESET_ALL}")

if args.save:
try:
with open(args.save, 'w') as f:
for js_url, endpoints in js_files.items():
f.write(f"{js_url}\n")
if endpoints:
f.write("Endpoints:\n")
for endpoint in sorted(endpoints):
f.write(f" {endpoint}\n")
else:
f.write("No endpoints found\n")
f.write("\n")
print(f"\n{Fore.GREEN}Results saved to {args.save}{Style.RESET_ALL}")
except IOError as e:
print(f"{Fore.RED}Error saving results to file: {e}{Style.RESET_ALL}")
else:
print(f"{Fore.RED}No JavaScript files were successfully analyzed.{Style.RESET_ALL}")

if __name__ == "__main__":
asyncio.run(main())

0 comments on commit a543f8e

Please sign in to comment.