-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawl.php
38 lines (33 loc) · 964 Bytes
/
crawl.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
<?php
ini_set('memory_limit', '256M');
require_once __DIR__ . '/vendor/autoload.php';
require('src/Crawler.php');
require('src/Page.php');
require('src/PageFactory.php');
require('src/Link.php');
require('src/LinkFactory.php');
use SevenEcks\StringUtils\StringUtils;
use SevenEcks\Ansi\Colorize;
use SevenEcks\Web\Crawler;
use SevenEcks\Web\PageFactory;
// load env vars
$dotenv = new Dotenv\Dotenv(__DIR__);
$dotenv->load();
// format strings nicely
$su = new StringUtils;
// Get the crawler
$crawler = new Crawler;
// Clear the log
$crawler->logger->clearLog();
// if we don't have args, give usage
if (!$argv[1]) {
return $su->alert('Usage: php ' . $argv[0] . ' http://example.com');
}
// get the url
$url = $argv[1];
// let the user know we are starting
$su->tell($su->tostr(Colorize::cyan('Beginning Crawl of URL: '), Colorize::yellow($url)));
// don't crawl external links
$crawler->setCrawlExternal(false);
// start crawling
$crawler->start($url);