-
Notifications
You must be signed in to change notification settings - Fork 0
/
dorker.py
81 lines (71 loc) · 3.66 KB
/
dorker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import requests, urllib
import sys, argparse
from bs4 import BeautifulSoup
print(" ___ ___ ___ ___ ___ ___ \n / /\ / /\ / /\ / /\ / /\ / /\ \n / /::\ / /::\ / /::\ / /:/ / /::\ / /::\ \n / /:/\:\ / /:/\:\ / /:/\:\ / /:/ / /:/\:\ / /:/\:\ \n / /:/ \:\ / /:/ \:\ / /::\ \:\ / /::\____ / /::\ \:\ / /::\ \:\ \n /__/:/ \__\:| /__/:/ \__\:\ /__/:/\:\_\:\ /__/:/\:::::\ /__/:/\:\ \:\ /__/:/\:\_\:\ \n \ \:\ / /:/ \ \:\ / /:/ \__\/~|::\/:/ \__\/~|:|~~~~ \ \:\ \:\_\/ \__\/~|::\/:/\n \ \:\ /:/ \ \:\ /:/ | |:|::/ | |:| \ \:\ \:\ | |:|::/\n \ \:\/:/ \ \:\/:/ | |:|\/ | |:| \ \:\_\/ | |:|\/ \n \__\::/ \ \::/ |__|:|~ |__|:| \ \:\ |__|:|~ \n ~~ \__\/ \__\| \__\| \__\/ \__\| ")
print("\nDorker v0.1a")
print("Author: Bastian Muhlhauser, @xpl0ited1\n")
parser = argparse.ArgumentParser()
parser.add_argument("--site", help="Search for links related to a domain")
parser.add_argument("--ext", help="Search for links with a extension")
parser.add_argument("--inurl", help="Search for links containing a string in the URL")
parser.add_argument("--intitle", help="Search for links containing a string as title")
parser.add_argument("--search", help="Search for links containing a string")
parser.add_argument("--output", help="Specify the output file")
parser.add_argument("--pages", help="Specify the number of pages to look for in Google Search", type=int)
args = parser.parse_args()
site = "site:%s+" % args.site if args.site else ''
ext = "ext:%s+" % args.ext if args.ext else ''
inurl = "inurl:%s+" % args.inurl if args.inurl else ''
intitle = "intitle:%s+" % args.intitle if args.intitle else ''
search = "intitle:%s" % args.search if args.search else ''
pages = args.pages if args.pages else 0
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/21.0'
}
base_url = "https://www.google.com/search?q="
url = base_url+site+ext+inurl+intitle+search
links = []
def initial_search():
print("[+] Obtaining links...")
r = requests.get(url, headers)
soup = BeautifulSoup(r.text, 'html.parser')
divs = soup.find_all('div', attrs={'class':'g'},recursive=True)
print("[+] Parsing HTML tags")
for div in divs:
hrefs = div.find_all(href=True)
for a in hrefs:
if a['href'][0:4] == '/url':
link = a['href'][7:].split('&sa')[0]
if link[0:15] != 'http://webcache':
links.append(link)
if pages == 0:
initial_search()
else:
initial_search()
i = 10
end = (pages - 1) * 10
while i < end:
url = url+("&start=%d" % i)
i = i + 10
r = requests.get(url, headers)
soup = BeautifulSoup(r.text, 'html.parser')
divs = soup.find_all('div', attrs={'class':'g'},recursive=True)
print("[+] Page: %d" % ((i+1)/10))
print("[+] Parsing HTML tags")
for div in divs:
hrefs = div.find_all(href=True)
for a in hrefs:
if a['href'][0:4] == '/url':
link = a['href'][7:].split('&sa')[0]
if link[0:15] != 'http://webcache':
links.append(link)
print("[+] Results:\n")
for link in links:
print("%s\n" % link)
if args.output:
print("[+] Writing results...\n")
f=open(args.output,"w+")
for link in links:
f.write("%s\n" % link)
f.close()
print("\n[#] Finished...")