Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parameter to specify encoding error response #178

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/mirror.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os


def mirror(url, response):
def mirror(url, response,encoding_error_response):
if response != 'dummy':
clean_url = url.replace('http://', '').replace('https://', '').rstrip('/')
parts = clean_url.split('?')[0].split('/')
Expand Down Expand Up @@ -36,4 +36,4 @@ def mirror(url, response):
if len(url.split('?')) > 1:
trail += '?' + url.split('?')[1]
with open(path + name + trail, 'w+') as out_file:
out_file.write(response.encode('utf-8'))
out_file.write(response.encode('utf-8',errors=encoding_error_response))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ege-del, can we use errors = ignore here.

10 changes: 5 additions & 5 deletions core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,14 @@ def remove_regex(urls, regex):
return non_matching_urls


def writer(datasets, dataset_names, output_dir):
def writer(datasets, dataset_names, output_dir,encoding_error_response):
"""Write the results."""
for dataset, dataset_name in zip(datasets, dataset_names):
if dataset:
filepath = output_dir + '/' + dataset_name + '.txt'
with open(filepath, 'w+') as out_file:
joined = '\n'.join(dataset)
out_file.write(str(joined.encode('utf-8').decode('utf-8')))
out_file.write(str(joined.encode('utf-8',errors=encoding_error_response).decode('utf-8')))
out_file.write('\n')


Expand All @@ -98,12 +98,12 @@ def timer(diff, processed):
return minutes, seconds, time_per_request


def entropy(string):
def entropy(string,encoding_error_response):
"""Calculate the entropy of a string."""
entropy = 0
for number in range(256):
result = float(string.encode('utf-8').count(
chr(number))) / len(string.encode('utf-8'))
result = float(string.encode('utf-8',errors=encoding_error_response).count(
chr(number))) / len(string.encode('utf-8',errors=encoding_error_response))
if result != 0:
entropy = entropy - result * math.log(result, 2)
return entropy
Expand Down
11 changes: 6 additions & 5 deletions photon.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
type=float)
parser.add_argument('-p', '--proxy', help='Proxy server IP:PORT or DOMAIN:PORT', dest='proxies',
type=proxy_type)

parser.add_argument('--encoding-error', help='encoding error response parameter', dest='encoding_error',default='strict',choices=['backslashreplace','ignore','namereplace','strict','replace','xmlcharrefreplace',])
# Switches
parser.add_argument('--clone', help='clone the website locally', dest='clone',
action='store_true')
Expand Down Expand Up @@ -190,6 +190,7 @@
host = urlparse(main_url).netloc

output_dir = args.output or host
encoding_error_response = args.encoding_error

try:
domain = top_level(main_url)
Expand Down Expand Up @@ -240,7 +241,7 @@ def extractor(url):
"""Extract details from the response body."""
response = requester(url, main_url, delay, cook, headers, timeout, host, proxies, user_agents, failed, processed)
if clone:
mirror(url, response)
mirror(url, response,encoding_error_response)
matches = rhref.findall(response)
for link in matches:
# Remove everything after a "#" to deal with in-page anchors
Expand Down Expand Up @@ -282,7 +283,7 @@ def extractor(url):
if api:
matches = rentropy.findall(response)
for match in matches:
if entropy(match) >= 4:
if entropy(match,encoding_error_response) >= 4:
verb('Key', match)
keys.add(url + ': ' + match)

Expand Down Expand Up @@ -382,7 +383,7 @@ def jscanner(url):
dataset_names = ['files', 'intel', 'robots', 'custom', 'failed', 'internal',
'scripts', 'external', 'fuzzable', 'endpoints', 'keys']

writer(datasets, dataset_names, output_dir)
writer(datasets, dataset_names, output_dir,encoding_error_response)
# Printing out results
print(('%s-%s' % (red, end)) * 50)
for dataset, dataset_name in zip(datasets, dataset_names):
Expand All @@ -407,7 +408,7 @@ def jscanner(url):
from plugins.find_subdomains import find_subdomains
subdomains = find_subdomains(domain)
print('%s %i subdomains found' % (info, len(subdomains)))
writer([subdomains], ['subdomains'], output_dir)
writer([subdomains], ['subdomains'], output_dir,encoding_error_response)
datasets['subdomains'] = subdomains
from plugins.dnsdumpster import dnsdumpster
print('%s Generating DNS map' % run)
Expand Down