-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* misc!: Remove non-developer dependencies. (#3) * misc!: Remove `sprint` dependency. * misc!: Remove `web_scraper` dependency. * misc!: Restructure the API. * test: Add tests.
- Loading branch information
Showing
12 changed files
with
972 additions
and
179 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,23 @@ | ||
## A lightweight `robots.txt` ruleset parser to ensure your application follows the standard protocol. | ||
## A complete, dependency-less and fully documented `robots.txt` ruleset parser. | ||
|
||
### Usage | ||
|
||
The following code gets the `robots.txt` robot exclusion ruleset of a website. | ||
|
||
`quietMode` determines whether or not the library should print warning messages in the case of the `robots.txt` not being valid or other errors. | ||
|
||
```dart | ||
// Create an instance of the `robots.txt` parser | ||
final robots = Robots(host: 'https://github.com/'); | ||
// Read the ruleset of the website | ||
await robots.read(); | ||
// Get the contents of the `robots.txt` file. | ||
final contents = /* Your method of obtaining the contents of a `robots.txt` file. */; | ||
// Parse the contents. | ||
final robots = Robots.parse(contents); | ||
``` | ||
|
||
Now that the `robots.txt` file has been read, we can verify whether we can visit a certain path or not: | ||
Now that the `robots.txt` file has been read, we can verify whether we can visit | ||
a certain path or not: | ||
|
||
```dart | ||
final userAgent = '*'; | ||
print("Can '$userAgent' visit '/gist/'?"); | ||
print(robots.canVisitPath('/gist/', userAgent: '*')); // It cannot | ||
print("Can '$userAgent' visit '/wordcollector/robots_txt'?"); | ||
print(robots.canVisitPath('/wordcollector/robots_txt', userAgent: '*')); // It can | ||
``` | ||
final userAgent = /* Your user agent. */; | ||
// False: it cannot. | ||
print(robots.verifyCanAccess('/gist/', userAgent: userAgent)); | ||
// True: it can. | ||
print(robots.verifyCanAccess('/wordcollector/robots_txt', userAgent: userAgent)); | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,5 @@ | ||
include: package:words/core.yaml | ||
include: package:words/core.yaml | ||
|
||
linter: | ||
rules: | ||
directives_ordering: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,53 @@ | ||
import 'dart:convert'; | ||
import 'dart:io'; | ||
|
||
import 'package:robots_txt/robots_txt.dart'; | ||
|
||
Future main() async { | ||
// Create an instance of the `robots.txt` parser. | ||
final robots = Robots(host: 'https://github.com/'); | ||
// Read the ruleset of the website. | ||
await robots.read(); | ||
// Print the ruleset. | ||
Future<void> main() async { | ||
// Get the contents of the `robots.txt` file. | ||
final contents = await fetchFileContents(host: 'github.com'); | ||
// Parse the contents. | ||
final robots = Robots.parse(contents); | ||
|
||
// Print the rulesets. | ||
for (final ruleset in robots.rulesets) { | ||
// Print the user-agent the ruleset applies to. | ||
print(ruleset.appliesTo); | ||
// Print the user-agent this ruleset applies to. | ||
print(ruleset.userAgent); | ||
|
||
if (ruleset.allows.isNotEmpty) { | ||
print('Allows:'); | ||
print('Allowed:'); | ||
} | ||
// Print the path expressions allowed by this ruleset. | ||
// Print the regular expressions that match to paths allowed by this | ||
// ruleset. | ||
for (final rule in ruleset.allows) { | ||
print(' - ${rule.expression}'); | ||
print(' - ${rule.pattern}'); | ||
} | ||
|
||
if (ruleset.disallows.isNotEmpty) { | ||
print('Disallows:'); | ||
print('Disallowed:'); | ||
} | ||
// Print the path expressions disallowed by this ruleset. | ||
// Print the regular expressions that match to paths disallowed by this | ||
// ruleset. | ||
for (final rule in ruleset.disallows) { | ||
print(' - ${rule.expression}'); | ||
print(' - ${rule.pattern}'); | ||
} | ||
} | ||
|
||
// False: it cannot. | ||
print(robots.canVisitPath('/gist/', userAgent: '*')); | ||
print(robots.verifyCanAccess('/gist/', userAgent: '*')); | ||
// True: it can. | ||
print(robots.canVisitPath('/wordcollector/robots_txt', userAgent: '*')); | ||
return; | ||
print(robots.verifyCanAccess('/wordcollector/robots_txt', userAgent: '*')); | ||
} | ||
|
||
Future<String> fetchFileContents({required String host}) async { | ||
final client = HttpClient(); | ||
|
||
final contents = await client | ||
.get(host, 80, '/robots.txt') | ||
.then((request) => request.close()) | ||
.then((response) => response.transform(utf8.decoder).join()); | ||
|
||
client.close(); | ||
|
||
return contents; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
/// Lightweight, fully documented `robots.txt` file parser. | ||
library robots_txt; | ||
|
||
export 'src/parser.dart'; | ||
export 'src/rule.dart'; | ||
export 'src/parser.dart' show Robots, PrecedentRuleType, FieldType; | ||
export 'src/rule.dart' show Rule, FindRule, Precedence, PrecedenceStrategy; | ||
export 'src/ruleset.dart' show Ruleset, FindRuleInRuleset; |
Oops, something went wrong.