-
Notifications
You must be signed in to change notification settings - Fork 0
/
linkshovel.rb
107 lines (103 loc) · 2.28 KB
/
linkshovel.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
puts "Initializing..."
require 'nokogiri'
require 'open-uri'
require 'set'
require 'bigdecimal'
def get_links(url)
Nokogiri::HTML(URI.open(url).read).css("a").map do |link|
if (href = link.attr("href"))
href
end
end.compact
end
links = Set[]
init_links = []
puts "Testing connection..."
begin
get_links("http://google.com")
rescue
puts "Connection failed, check your connection and try again"
exit
end
puts " Success!"
puts "Reading config..."
config = File.read('config.html')
Nokogiri::HTML(config).css("a").map do |link|
if (href = link.attr("href"))
puts " "+href
init_links.append(href)
end
end.compact
initscrapecount=0
init_links.each { |url|
initscrapecount+=1
begin
puts "\e[H\e[2J"
puts "Performing initial scrape (this may take a while)... "+(initscrapecount/2).to_s+"%"
puts "Current: "+url
links.merge(get_links(url))
rescue
next
end
}
kill=0
threadreadycount=1
threadtotalcount=0
File.open("./threads.txt", "r") do |f|
f.each_line do |line|
threadtotalcount=line.to_i
end
end
puts "Starting Threads..."
cycleno=0
lastSaveIncrement=0
for i in 1..threadtotalcount
Thread.new {
puts " Thread "+threadreadycount.to_s+" started!"
threadreadycount+=1
for i in 0..BigDecimal::INFINITY
begin
cycleno+=1
if cycleno.div(1000)>lastSaveIncrement
lastSaveIncrement = cycleno.div(1000)
File.open("links.txt", 'w') { |file| file.write(links.to_s) }
end
link_current=links.to_a.sample
links.merge(get_links(link_current))
rescue
next
end
end
}
end
puts "Entering main loop..."
puts "\e[H\e[2J"
Thread.new {
links_int=links.to_a.length.to_s
for i in 0..BigDecimal::INFINITY
begin
for i in 0..200
begin
puts "\033[0;0H"
puts "LinkShovel"
puts "‾‾‾‾‾‾‾‾‾‾"
puts "Cycle no. "+cycleno.to_s
puts "Next save: Cycle "+((lastSaveIncrement+1)*1000).to_s
puts "Found "+links_int+" pages"
puts
puts "Press Enter to stop"
links_int=links.to_a.length.to_s
sleep 0.1
end
puts "\e[H\e[2J"
end
end
end
}
gets
puts "\e[H\e[2J"
puts "Exiting..."
File.open("links.txt", 'w') { |file| file.write(links.to_s) }
puts "\e[H\e[2J"
puts "You can now close this window."
abort