misc: Restructure the API. (#5)

* misc!: Remove non-developer dependencies. (#3) * misc!: Remove `sprint` dependency. * misc!: Remove `web_scraper` dependency. * misc!: Restructure the API. * test: Add tests.
vxern · Jan 8, 2023 · 5f94cb0 · 5f94cb0
1 parent de0a3a4
commit 5f94cb0
Show file tree

Hide file tree

Showing 12 changed files with 972 additions and 179 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,25 @@
+## 2.0.0
+
+- Additions:
+  - Added developer dependencies:
+    - `meta` for static analysis.
+    - `test` for testing.
+  - Added support for the 'Sitemap' field.
+  - Added support for specifying:
+    - The precedent rule type for determining whether a certain user-agent can
+      or cannot access a certain path. (`PrecedentRuleType`)
+    - The comparison strategy to use for comparing rule precedence.
+      (`PrecedenceStrategy`)
+  - Added tests.
+- Changes:
+  - Bumped the minimum SDK version to `2.17.0` for enhanced enum support.
+- Improvements:
+  - Made all structs `const` and marked them as `@sealed` and `@immutable`.
+- Deletions:
+  - Removed dependencies:
+    - `sprint`
+    - `web_scraper`
+
 ## 1.1.1
 
 - Updated project description.

diff --git a/README.md b/README.md
@@ -1,24 +1,23 @@
-## A lightweight `robots.txt` ruleset parser to ensure your application follows the standard protocol.
+## A complete, dependency-less and fully documented `robots.txt` ruleset parser.
 
 ### Usage
 
 The following code gets the `robots.txt` robot exclusion ruleset of a website.
 
-`quietMode` determines whether or not the library should print warning messages in the case of the `robots.txt` not being valid or other errors.
-
 ```dart
-// Create an instance of the `robots.txt` parser
-final robots = Robots(host: 'https://github.com/');
-// Read the ruleset of the website
-await robots.read();
+// Get the contents of the `robots.txt` file.
+final contents = /* Your method of obtaining the contents of a `robots.txt` file. */;
+// Parse the contents.
+final robots = Robots.parse(contents);
 ```
 
-Now that the `robots.txt` file has been read, we can verify whether we can visit a certain path or not:
+Now that the `robots.txt` file has been read, we can verify whether we can visit
+a certain path or not:
 
 ```dart
-final userAgent = '*';
-print("Can '$userAgent' visit '/gist/'?");
-print(robots.canVisitPath('/gist/', userAgent: '*')); // It cannot
-print("Can '$userAgent' visit '/wordcollector/robots_txt'?");
-print(robots.canVisitPath('/wordcollector/robots_txt', userAgent: '*')); // It can
-```
+final userAgent = /* Your user agent. */;
+// False: it cannot.
+print(robots.verifyCanAccess('/gist/', userAgent: userAgent));
+// True: it can.
+print(robots.verifyCanAccess('/wordcollector/robots_txt', userAgent: userAgent));
+```
diff --git a/analysis_options.yaml b/analysis_options.yaml
@@ -1 +1,5 @@
-include: package:words/core.yaml
+include: package:words/core.yaml
+
+linter:
+  rules:
+    directives_ordering: false
diff --git a/example/example.dart b/example/example.dart
@@ -1,32 +1,53 @@
+import 'dart:convert';
+import 'dart:io';
+
 import 'package:robots_txt/robots_txt.dart';
 
-Future main() async {
-  // Create an instance of the `robots.txt` parser.
-  final robots = Robots(host: 'https://github.com/');
-  // Read the ruleset of the website.
-  await robots.read();
-  // Print the ruleset.
+Future<void> main() async {
+  // Get the contents of the `robots.txt` file.
+  final contents = await fetchFileContents(host: 'github.com');
+  // Parse the contents.
+  final robots = Robots.parse(contents);
+
+  // Print the rulesets.
   for (final ruleset in robots.rulesets) {
-    // Print the user-agent the ruleset applies to.
-    print(ruleset.appliesTo);
+    // Print the user-agent this ruleset applies to.
+    print(ruleset.userAgent);
+
     if (ruleset.allows.isNotEmpty) {
-      print('Allows:');
+      print('Allowed:');
     }
-    // Print the path expressions allowed by this ruleset.
+    // Print the regular expressions that match to paths allowed by this
+    // ruleset.
     for (final rule in ruleset.allows) {
-      print('  - ${rule.expression}');
+      print('  - ${rule.pattern}');
     }
+
     if (ruleset.disallows.isNotEmpty) {
-      print('Disallows:');
+      print('Disallowed:');
     }
-    // Print the path expressions disallowed by this ruleset.
+    // Print the regular expressions that match to paths disallowed by this
+    // ruleset.
     for (final rule in ruleset.disallows) {
-      print('  - ${rule.expression}');
+      print('  - ${rule.pattern}');
     }
   }
+
   // False: it cannot.
-  print(robots.canVisitPath('/gist/', userAgent: '*'));
+  print(robots.verifyCanAccess('/gist/', userAgent: '*'));
   // True: it can.
-  print(robots.canVisitPath('/wordcollector/robots_txt', userAgent: '*'));
-  return;
+  print(robots.verifyCanAccess('/wordcollector/robots_txt', userAgent: '*'));
+}
+
+Future<String> fetchFileContents({required String host}) async {
+  final client = HttpClient();
+
+  final contents = await client
+      .get(host, 80, '/robots.txt')
+      .then((request) => request.close())
+      .then((response) => response.transform(utf8.decoder).join());
+
+  client.close();
+
+  return contents;
 }
diff --git a/lib/robots_txt.dart b/lib/robots_txt.dart
@@ -1,5 +1,6 @@
 /// Lightweight, fully documented `robots.txt` file parser.
 library robots_txt;
 
-export 'src/parser.dart';
-export 'src/rule.dart';
+export 'src/parser.dart' show Robots, PrecedentRuleType, FieldType;
+export 'src/rule.dart' show Rule, FindRule, Precedence, PrecedenceStrategy;
+export 'src/ruleset.dart' show Ruleset, FindRuleInRuleset;