From 5f94cb079c7b03720438b1d5096fc8859b3ff131 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dorian=20Oszcz=C4=99da?= <vxern@wordcollector.co.uk>
Date: Sun, 8 Jan 2023 15:07:51 +0000
Subject: [PATCH] misc: Restructure the API. (#5)

* misc!: Remove non-developer dependencies. (#3)

* misc!: Remove `sprint` dependency.

* misc!: Remove `web_scraper` dependency.

* misc!: Restructure the API.

* test: Add tests.
---
 CHANGELOG.md                   |  22 ++
 README.md                      |  27 ++-
 analysis_options.yaml          |   6 +-
 example/example.dart           |  55 +++--
 lib/robots_txt.dart            |   5 +-
 lib/src/parser.dart            | 319 +++++++++++++++++++-------
 lib/src/rule.dart              |  87 +++++--
 lib/src/ruleset.dart           |  85 ++++---
 lib/src/utils.dart             |   3 -
 pubspec.yaml                   |  14 +-
 test/contents_definitions.dart | 126 +++++++++++
 test/parser_test.dart          | 402 +++++++++++++++++++++++++++++++++
 12 files changed, 972 insertions(+), 179 deletions(-)
 delete mode 100644 lib/src/utils.dart
 create mode 100644 test/contents_definitions.dart
 create mode 100644 test/parser_test.dart

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0142b83..fb1ea73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,25 @@
+## 2.0.0
+
+- Additions:
+  - Added developer dependencies:
+    - `meta` for static analysis.
+    - `test` for testing.
+  - Added support for the 'Sitemap' field.
+  - Added support for specifying:
+    - The precedent rule type for determining whether a certain user-agent can
+      or cannot access a certain path. (`PrecedentRuleType`)
+    - The comparison strategy to use for comparing rule precedence.
+      (`PrecedenceStrategy`)
+  - Added tests.
+- Changes:
+  - Bumped the minimum SDK version to `2.17.0` for enhanced enum support.
+- Improvements:
+  - Made all structs `const` and marked them as `@sealed` and `@immutable`.
+- Deletions:
+  - Removed dependencies:
+    - `sprint`
+    - `web_scraper`
+
 ## 1.1.1
 
 - Updated project description.
diff --git a/README.md b/README.md
index 0cead0c..fec6a52 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,23 @@
-## A lightweight `robots.txt` ruleset parser to ensure your application follows the standard protocol.
+## A complete, dependency-less and fully documented `robots.txt` ruleset parser.
 
 ### Usage
 
 The following code gets the `robots.txt` robot exclusion ruleset of a website.
 
-`quietMode` determines whether or not the library should print warning messages in the case of the `robots.txt` not being valid or other errors.
-
 ```dart
-// Create an instance of the `robots.txt` parser
-final robots = Robots(host: 'https://github.com/');
-// Read the ruleset of the website
-await robots.read();
+// Get the contents of the `robots.txt` file.
+final contents = /* Your method of obtaining the contents of a `robots.txt` file. */;
+// Parse the contents.
+final robots = Robots.parse(contents);
 ```
 
-Now that the `robots.txt` file has been read, we can verify whether we can visit a certain path or not:
+Now that the `robots.txt` file has been read, we can verify whether we can visit
+a certain path or not:
 
 ```dart
-final userAgent = '*';
-print("Can '$userAgent' visit '/gist/'?");
-print(robots.canVisitPath('/gist/', userAgent: '*')); // It cannot
-print("Can '$userAgent' visit '/wordcollector/robots_txt'?");
-print(robots.canVisitPath('/wordcollector/robots_txt', userAgent: '*')); // It can
-```
\ No newline at end of file
+final userAgent = /* Your user agent. */;
+// False: it cannot.
+print(robots.verifyCanAccess('/gist/', userAgent: userAgent));
+// True: it can.
+print(robots.verifyCanAccess('/wordcollector/robots_txt', userAgent: userAgent));
+```
diff --git a/analysis_options.yaml b/analysis_options.yaml
index e925772..4996fe7 100644
--- a/analysis_options.yaml
+++ b/analysis_options.yaml
@@ -1 +1,5 @@
-include: package:words/core.yaml
\ No newline at end of file
+include: package:words/core.yaml
+
+linter:
+  rules:
+    directives_ordering: false
\ No newline at end of file
diff --git a/example/example.dart b/example/example.dart
index b39b2c0..05052d7 100644
--- a/example/example.dart
+++ b/example/example.dart
@@ -1,32 +1,53 @@
+import 'dart:convert';
+import 'dart:io';
+
 import 'package:robots_txt/robots_txt.dart';
 
-Future main() async {
-  // Create an instance of the `robots.txt` parser.
-  final robots = Robots(host: 'https://github.com/');
-  // Read the ruleset of the website.
-  await robots.read();
-  // Print the ruleset.
+Future<void> main() async {
+  // Get the contents of the `robots.txt` file.
+  final contents = await fetchFileContents(host: 'github.com');
+  // Parse the contents.
+  final robots = Robots.parse(contents);
+
+  // Print the rulesets.
   for (final ruleset in robots.rulesets) {
-    // Print the user-agent the ruleset applies to.
-    print(ruleset.appliesTo);
+    // Print the user-agent this ruleset applies to.
+    print(ruleset.userAgent);
+
     if (ruleset.allows.isNotEmpty) {
-      print('Allows:');
+      print('Allowed:');
     }
-    // Print the path expressions allowed by this ruleset.
+    // Print the regular expressions that match to paths allowed by this
+    // ruleset.
     for (final rule in ruleset.allows) {
-      print('  - ${rule.expression}');
+      print('  - ${rule.pattern}');
     }
+
     if (ruleset.disallows.isNotEmpty) {
-      print('Disallows:');
+      print('Disallowed:');
     }
-    // Print the path expressions disallowed by this ruleset.
+    // Print the regular expressions that match to paths disallowed by this
+    // ruleset.
     for (final rule in ruleset.disallows) {
-      print('  - ${rule.expression}');
+      print('  - ${rule.pattern}');
     }
   }
+
   // False: it cannot.
-  print(robots.canVisitPath('/gist/', userAgent: '*'));
+  print(robots.verifyCanAccess('/gist/', userAgent: '*'));
   // True: it can.
-  print(robots.canVisitPath('/wordcollector/robots_txt', userAgent: '*'));
-  return;
+  print(robots.verifyCanAccess('/wordcollector/robots_txt', userAgent: '*'));
+}
+
+Future<String> fetchFileContents({required String host}) async {
+  final client = HttpClient();
+
+  final contents = await client
+      .get(host, 80, '/robots.txt')
+      .then((request) => request.close())
+      .then((response) => response.transform(utf8.decoder).join());
+
+  client.close();
+
+  return contents;
 }
diff --git a/lib/robots_txt.dart b/lib/robots_txt.dart
index 7a934c4..59b7c52 100644
--- a/lib/robots_txt.dart
+++ b/lib/robots_txt.dart
@@ -1,5 +1,6 @@
 /// Lightweight, fully documented `robots.txt` file parser.
 library robots_txt;
 
-export 'src/parser.dart';
-export 'src/rule.dart';
+export 'src/parser.dart' show Robots, PrecedentRuleType, FieldType;
+export 'src/rule.dart' show Rule, FindRule, Precedence, PrecedenceStrategy;
+export 'src/ruleset.dart' show Ruleset, FindRuleInRuleset;
diff --git a/lib/src/parser.dart b/lib/src/parser.dart
index cb918a1..32af43e 100644
--- a/lib/src/parser.dart
+++ b/lib/src/parser.dart
@@ -1,101 +1,163 @@
+import 'package:meta/meta.dart';
+
 import 'package:robots_txt/src/rule.dart';
 import 'package:robots_txt/src/ruleset.dart';
-import 'package:sprint/sprint.dart';
-import 'package:web_scraper/web_scraper.dart';
 
-/// Abstracts away the rather convoluted declaration for an element with two
-/// fields; 'title' and 'attributes'.  'attributes' is a map containing the
-/// attributes of the element.
-typedef Element = Map<String, Map<String, dynamic>>;
+/// Defines a Regex pattern that matches to comments.
+final commentPattern = RegExp('#.*');
 
-/// Allows for parsing of a host's `robots.txt` to get information about which
-/// of its resources may or may not be accessed, as well as which of its pages
-/// cannot be traversed.
+/// Stores information about a `robots.txt` file, exposing a simple and concise
+/// API for working with the file and validating if a certain path can be
+/// accessed by a given user-agent.
+@immutable
+@sealed
 class Robots {
-  /// Instance of `Sprint` message logger for the `robots.txt` parser.
-  final Sprint log;
-
-  /// The host of this `robots.txt` file.
-  final String host;
-
-  /// Stores an instance of the scraper for a given URL.
-  final WebScraper scraper;
-
-  /// Stores expressions for both paths which may or may not be traversed.
-  final List<Ruleset> rulesets = [];
-
-  /// Creates an instance of a `robots.txt` parser for the provided [host].
-  Robots({
-    required this.host,
-    bool quietMode = false,
-    bool productionMode = true,
-  })  : scraper = WebScraper(host),
-        log = Sprint(
-          'Robots',
-          quietMode: quietMode,
-          productionMode: productionMode,
-        );
+  /// Stores information about the rules specified for given user-agents.
+  final List<Ruleset> rulesets;
+
+  /// Stores links to the website's sitemaps.
+  final List<String> sitemaps;
+
+  /// Defines an instance of `Robots` with no rulesets.
+  static const _empty = Robots._construct(rulesets: [], sitemaps: []);
 
-  /// Reads and parses the `robots.txt` file of the [host].
-  Future<void> read({String? onlyRelevantTo}) async {
-    await scraper.loadWebPage('/robots.txt');
-    final body = scraper.getElement('body', [])[0];
+  /// Creates an instance of `Robots`.
+  const Robots._construct({required this.rulesets, required this.sitemaps});
 
-    final invalidRobotsFileError = "'$host' has an invalid `robots.txt`:";
+  /// Parses the contents of a `robots.txt` file, creating an instance of
+  /// `Robots`. If [onlyApplicableTo] is specified, the parser will ignore any
+  /// rulesets that do not apply to it.
+  ///
+  /// This function will never throw an exception.
+  factory Robots.parse(String contents, {String? onlyApplicableTo}) {
+    contents = contents.replaceAll(commentPattern, '');
 
-    if (body.isEmpty) {
-      log.warn('$invalidRobotsFileError No text elements found');
-      return;
+    if (contents.trim().isEmpty) {
+      return Robots._empty;
     }
 
-    final content = body['title'] as String;
-    final lines = content.split('\n').where((line) => line.isNotEmpty);
-    parseRulesets(lines, onlyRelevantTo: onlyRelevantTo);
+    final lines = contents.split('\n').where((line) => line.isNotEmpty);
+
+    return Robots._fromLines(lines, onlyFor: onlyApplicableTo);
   }
 
-  /// Iterates over [lines] and parses each ruleset, additionally ignoring
-  /// those rulesets which are not relevant to [onlyRelevantTo].
-  void parseRulesets(Iterable<String> lines, {String? onlyRelevantTo}) {
-    Ruleset? ruleset;
+  /// Iterates over [lines] and sequentially parses each ruleset, optionally
+  /// ignoring those rulesets which are not relevant to [onlyFor].
+  factory Robots._fromLines(
+    Iterable<String> lines, {
+    String? onlyFor,
+  }) {
+    final rulesets = <Ruleset>[];
+    final sitemaps = <String>[];
+
+    // Temporary data used for parsing rulesets.
+    final userAgents = <String>[];
+    final allows = <Rule>[];
+    final disallows = <Rule>[];
+
+    bool isReadingRuleset() => userAgents.isNotEmpty;
+
+    void saveRulesets() {
+      for (final userAgent in userAgents) {
+        rulesets.add(
+          Ruleset(
+            userAgent: userAgent,
+            allows: List.from(allows),
+            disallows: List.from(disallows),
+          ),
+        );
+      }
+    }
+
+    void reset() {
+      userAgents.clear();
+      allows.clear();
+      disallows.clear();
+    }
+
+    late FieldType previousType;
     for (var index = 0; index < lines.length; index++) {
-      final field = getRobotsFieldFromLine(lines.elementAt(index));
+      final field = _getFieldFromLine(lines.elementAt(index));
+      if (field == null) {
+        continue;
+      }
+
+      final type = FieldType.byKey(field.key);
+      if (type == null) {
+        continue;
+      }
 
-      switch (field.key) {
-        case 'user-agent':
-          if (ruleset != null) {
-            rulesets.add(ruleset);
+      switch (type) {
+        case FieldType.userAgent:
+          if (userAgents.isNotEmpty && previousType != FieldType.userAgent) {
+            saveRulesets();
+            reset();
           }
-          if (onlyRelevantTo != null && field.key != onlyRelevantTo) {
-            ruleset = null;
+
+          if (onlyFor != null && field.key != onlyFor) {
             break;
           }
-          ruleset = Ruleset(field.value);
+
+          userAgents.add(field.value);
           break;
+        case FieldType.disallow:
+          if (!isReadingRuleset()) {
+            break;
+          }
 
-        case 'allow':
-          if (ruleset != null) {
-            final expression = convertFieldPathToExpression(field.value);
-            ruleset.allows.add(Rule(expression, index));
+          final RegExp pattern;
+          try {
+            pattern = _convertPathToRegExp(field.value);
+          } on FormatException {
+            break;
           }
+          disallows.add(
+            Rule(
+              pattern: pattern,
+              precedence: lines.length - (index + 1),
+            ),
+          );
+
           break;
-        case 'disallow':
-          if (ruleset != null) {
-            final expression = convertFieldPathToExpression(field.value);
-            ruleset.disallows.add(Rule(expression, index));
+        case FieldType.allow:
+          if (!isReadingRuleset()) {
+            break;
           }
+
+          final RegExp pattern;
+          try {
+            pattern = _convertPathToRegExp(field.value);
+          } on FormatException {
+            break;
+          }
+          allows.add(
+            Rule(
+              pattern: pattern,
+              precedence: lines.length - (index + 1),
+            ),
+          );
+
+          break;
+        case FieldType.sitemap:
+          sitemaps.add(field.value);
           break;
       }
+
+      previousType = type;
     }
 
-    if (ruleset != null) {
-      rulesets.add(ruleset);
+    if (isReadingRuleset()) {
+      saveRulesets();
+      reset();
     }
+
+    return Robots._construct(rulesets: rulesets, sitemaps: sitemaps);
   }
 
   /// Reads a path declaration from within `robots.txt` and converts it to a
   /// regular expression for later matching.
-  RegExp convertFieldPathToExpression(String pathDeclaration) {
-    // Collapse duplicate slashes and wildcards into singles.
+  static RegExp _convertPathToRegExp(String pathDeclaration) {
+    // Collapse duplicate slashes and wildcards into single ones.
     final collapsed =
         pathDeclaration.replaceAll('/+', '/').replaceAll('*+', '*');
     final normalised = collapsed.endsWith('*')
@@ -104,35 +166,126 @@ class Robots {
     final withWildcardsReplaced =
         normalised.replaceAll('.', r'\.').replaceAll('*', '.*');
     final withTrailingText = withWildcardsReplaced.contains(r'$')
-        ? withWildcardsReplaced.split(r'$')[0]
+        ? withWildcardsReplaced.split(r'$').first
         : '$withWildcardsReplaced.*';
     return RegExp(withTrailingText, caseSensitive: false, dotAll: true);
   }
 
   /// Extracts the key and value from [target] and puts it into a `MapEntry`.
-  MapEntry<String, String> getRobotsFieldFromLine(String target) {
+  static MapEntry<String, String>? _getFieldFromLine(String target) {
     final keyValuePair = target.split(':');
-    final key = keyValuePair[0].toLowerCase();
+    if (keyValuePair.length < 2) {
+      return null;
+    }
+
+    final key = keyValuePair.first.trim();
     final value = keyValuePair.sublist(1).join(':').trim();
     return MapEntry(key, value);
   }
 
-  /// Determines whether or not [path] may be traversed.
-  bool canVisitPath(String path, {required String userAgent}) {
-    final explicitAllowance = rulesets.getRule(
-      appliesTo: userAgent,
-      concernsPath: path,
-      andAllowsIt: true,
+  /// Checks if the `robots.txt` file allows [userAgent] to access [path].
+  bool verifyCanAccess(
+    String path, {
+    required String userAgent,
+    PrecedentRuleType typePrecedence = PrecedentRuleType.defaultPrecedentType,
+    PrecedenceStrategy comparisonMethod = PrecedenceStrategy.defaultStrategy,
+  }) {
+    final allowedBy = rulesets.findApplicableRule(
+      userAgent: userAgent,
+      path: path,
+      type: RuleType.allow,
+      comparisonMethod: comparisonMethod,
     );
-    final explicitDisallowance = rulesets.getRule(
-      appliesTo: userAgent,
-      concernsPath: path,
-      andAllowsIt: false,
+    final disallowedBy = rulesets.findApplicableRule(
+      userAgent: userAgent,
+      path: path,
+      type: RuleType.disallow,
+      comparisonMethod: comparisonMethod,
     );
 
-    final allowancePriority = explicitAllowance?.priority ?? -1;
-    final disallowancePriority = explicitDisallowance?.priority ?? -1;
+    switch (typePrecedence) {
+      case PrecedentRuleType.defaultPrecedentType:
+      // TODO(vxern): Below is a fix for an issue in Dart 2.18 with the enhanced
+      //  enums. This issue is fixed in 2.19, which is still on the beta
+      //  channel. Refer to: https://github.com/dart-lang/sdk/issues/49188
+      // ignore: no_duplicate_case_values
+      case PrecedentRuleType.allow:
+        return allowedBy != null || disallowedBy == null;
+      case PrecedentRuleType.disallow:
+        return disallowedBy != null || allowedBy == null;
+    }
+  }
+}
+
+/// Describes the type of a rule.
+@internal
+enum RuleType {
+  /// A rule explicitly allows a given path.
+  allow,
+
+  /// A rule explicitly disallows a given path.
+  disallow,
+}
+
+/// Defines the method used to decide whether rules that explicitly allow a
+/// user-agent to access a path take precedence over ones that disallow it to do
+/// so, or the other way around.
+enum PrecedentRuleType {
+  /// The rule that explicitly allows a user-agent to access a path takes
+  /// precedence over rules that explicitly disallow it.
+  allow,
+
+  /// The rule that explicitly disallows a user-agent to access a path takes
+  /// precedence over rules that explicitly allow it.
+  disallow;
+
+  /// Defines the default precedent rule type.
+  static const defaultPrecedentType = PrecedentRuleType.allow;
+}
+
+/// Defines a key-value field of a `robots.txt` file specifying a rule.
+@visibleForTesting
+enum FieldType {
+  /// A field specifying the user-agent the following fields apply to.
+  userAgent(key: 'User-agent', example: '*'),
+
+  /// A field explicitly disallowing a user-agent to visit a path.
+  disallow(key: 'Disallow', example: '/'),
+
+  /// A field explicitly allowing a user-agent to visit a path.
+  allow(key: 'Allow', example: '/file.txt'),
+
+  /// A field specifying the location of a sitemap of a website.
+  sitemap(key: 'Sitemap', example: 'https://example.com/sitemap.xml');
+
+  /// The name of the field key.
+  final String key;
+
+  /// An example of a field definition. Used for testing.
+  final String example;
 
-    return allowancePriority >= disallowancePriority;
+  /// Contains the field types that introduce rules.
+  static const rules = [FieldType.allow, FieldType.disallow];
+
+  /// Constructs a `FieldType`.
+  const FieldType({required this.key, required this.example});
+
+  /// Converts a `FieldType` to a `robots.txt` field.
+  String toField([String? value]) => '$key: ${value ?? example}';
+
+  /// Attempts to resolve [key] to a `FieldKey` corresponding to that [key].
+  /// Returns `null` if not found.
+  static FieldType? byKey(String key) {
+    for (final value in FieldType.values) {
+      if (key == value.key) {
+        return value;
+      }
+    }
+
+    return null;
   }
+
+  @override
+  @Deprecated('Use `toField()` instead')
+  String toString();
 }
diff --git a/lib/src/rule.dart b/lib/src/rule.dart
index 3320ce6..df9c2a1 100644
--- a/lib/src/rule.dart
+++ b/lib/src/rule.dart
@@ -1,30 +1,79 @@
+import 'package:meta/meta.dart';
+
 /// A single rule (either `Allow` or `Disallow`) inside the `robots.txt` file.
+@immutable
+@sealed
 class Rule {
-  /// An expression which a path may be matched against to determine whether
-  /// this rule applies to the path.
-  final RegExp expression;
+  /// A regular expression matching to a particular path.
+  final RegExp pattern;
 
-  /// The priority of this rule based on its position inside the `robots.txt`
-  /// file.  If the path is determined to be relevant to two rules, the rule
-  /// with the higher priority *overrides* the ruling of the other.
-  final int priority;
+  /// The precedence of this rule based on its position inside the `robots.txt`
+  /// file. The rule with the higher precedence is used to decide whether or not
+  /// a path may be visited.
+  final int _precedence;
 
-  /// Instantiates a rule with an [expression] and the [priority] it has over
+  /// Instantiates a rule with an [pattern] and the [precedence] it has over
   /// other rules.
-  const Rule(this.expression, this.priority);
+  const Rule({required this.pattern, required int precedence})
+      : _precedence = precedence;
 }
 
-/// Extends `List<Rule>` with a method for getting the `Rule` with the highest
-/// [Rule.priority].
-extension RulingOnPath on List<Rule> {
-  /// Taking [path], checks which `Rule`s' expressions match [path], and
-  /// returns the `Rule` with the highest priority.
-  Rule? getRulingOnPath(String path) {
-    final relevantRules = where((rule) => rule.expression.hasMatch(path));
-    if (relevantRules.isEmpty) {
+/// Extends `List<Rule>` with methods used to find rule that pertain to a
+/// certain path.
+extension FindRule on List<Rule> {
+  /// Taking a [path], returns the `Rule`s that pertain to it.
+  List<Rule> findApplicable({required String path}) =>
+      where((rule) => rule.pattern.hasMatch(path)).toList();
+
+  /// Taking a [path], gets the `Rule`s that pertain to it, and returns the
+  /// `Rule` that has precedence over the other rules.
+  Rule? findMostApplicable({
+    required String path,
+    PrecedenceStrategy comparisonMethod = PrecedenceStrategy.defaultStrategy,
+  }) {
+    final comparisonFunction = _ruleComparisonFunctions[comparisonMethod]!;
+
+    final applicableRules = findApplicable(path: path);
+    if (applicableRules.isEmpty) {
       return null;
     }
-    // Get the relevant rule with the highest priority
-    return relevantRules.reduce((a, b) => a.priority > b.priority ? a : b);
+
+    return applicableRules.reduce(comparisonFunction);
   }
 }
+
+/// Extends `Rule?` with a getter `precedence` to avoid having to explicitly
+/// default to `-1` whenever attempting to access the hidden property
+/// `_precedence` on a nullish value.
+extension Precedence on Rule? {
+  /// Gets the precedence of this rule. Defaults to `-1` if `null`.
+  int get precedence => this?._precedence ?? -1;
+}
+
+/// The signature of a method that compares two variables of type `T` and
+/// returns the one supposed 'greater'.
+@internal
+typedef ComparisonFunction<T> = T Function(T a, T b);
+
+/// `ComparisonFunction`s matched to `PrecedenceStrategy`s.
+final _ruleComparisonFunctions =
+    Map<PrecedenceStrategy, ComparisonFunction<Rule>>.unmodifiable(
+  <PrecedenceStrategy, ComparisonFunction<Rule>>{
+    PrecedenceStrategy.higherTakesPrecedence: (a, b) =>
+        a.precedence > b.precedence ? a : b,
+    PrecedenceStrategy.lowerTakesPrecedence: (a, b) =>
+        a.precedence < b.precedence ? a : b,
+  },
+);
+
+/// Defines the strategy to use to compare rules as per their `precedence`.
+enum PrecedenceStrategy {
+  /// The rule defined higher up in the `robots.txt` file takes precedence.
+  higherTakesPrecedence,
+
+  /// The rule defines lower down in the `robots.txt` file takes precedence.
+  lowerTakesPrecedence;
+
+  /// Defines the default strategy to use to compare rules.
+  static const defaultStrategy = PrecedenceStrategy.higherTakesPrecedence;
+}
diff --git a/lib/src/ruleset.dart b/lib/src/ruleset.dart
index de1230a..99d3c72 100644
--- a/lib/src/ruleset.dart
+++ b/lib/src/ruleset.dart
@@ -1,46 +1,67 @@
+import 'package:meta/meta.dart';
+
+import 'package:robots_txt/src/parser.dart';
 import 'package:robots_txt/src/rule.dart';
 
 /// A collection of `Rule`s, and the `user-agent` they are relevant to inside
 /// the `robots.txt` file.
+@immutable
+@sealed
 class Ruleset {
-  /// The `user-agent` which this ruleset applies to.
-  final String appliesTo;
+  /// The user-agent which this ruleset applies to.
+  final String userAgent;
+
+  /// List of `Rule`s which state that a path may not be traversed.
+  final List<Rule> disallows;
 
-  /// List of `Rule`s which explicitly state that a path may be traversed.
-  final List<Rule> allows = [];
+  /// List of `Rule`s which state that a path may be traversed.
+  final List<Rule> allows;
 
-  /// List of `Rule`s which explicitly state that a path may not be traversed.
-  final List<Rule> disallows = [];
+  /// Whether this ruleset applies to all user-agents.
+  final bool appliesToAll;
 
   /// Instantiates a ruleset with the `user-agent`.
-  Ruleset(this.appliesTo);
+  const Ruleset({
+    required this.userAgent,
+    required this.allows,
+    required this.disallows,
+  }) : appliesToAll = userAgent == '*';
 
   /// Checks whether this ruleset applies to [userAgent].
-  bool doesConcern(String userAgent) =>
-      appliesTo == '*' || appliesTo == userAgent;
+  bool appliesTo(String userAgent) =>
+      appliesToAll || this.userAgent == userAgent;
 }
 
-/// Extends `List<Ruleset>` with a method for getting a single `Rule` from the
-/// list of `Rulesets`
-extension RulingOfRulesets on List<Ruleset> {
-  /// Gets the rule which [appliesTo], [concernsPath] [andAllowsIt].
-  Rule? getRule({
-    required String appliesTo,
-    required String concernsPath,
-    required bool andAllowsIt,
-  }) =>
-      fold<Rule?>(null, (current, next) {
-        if (!next.doesConcern(appliesTo)) {
-          return current;
-        }
-
-        final currentPriority = current?.priority ?? -1;
-        final relevantRules = andAllowsIt ? next.allows : next.disallows;
-        final nextRule = relevantRules.getRulingOnPath(concernsPath);
-
-        if (nextRule == null || nextRule.priority < currentPriority) {
-          return current;
-        }
-        return nextRule;
-      });
+/// Extends `List<Ruleset>` with a  method used to find a rule that matches
+/// the supplied filters.
+extension FindRuleInRuleset on List<Ruleset> {
+  /// Gets the rule that applies to [userAgent], pertains to [path] and is of
+  /// type [type].
+  Rule? findApplicableRule({
+    required String userAgent,
+    required String path,
+    required RuleType type,
+    PrecedenceStrategy comparisonMethod = PrecedenceStrategy.defaultStrategy,
+  }) {
+    for (final ruleset in this) {
+      final rules = type == RuleType.allow ? ruleset.allows : ruleset.disallows;
+      if (rules.isEmpty) {
+        continue;
+      }
+
+      if (!ruleset.appliesTo(userAgent)) {
+        continue;
+      }
+
+      final rule = rules.findMostApplicable(
+        path: path,
+        comparisonMethod: comparisonMethod,
+      );
+      if (rule != null) {
+        return rule;
+      }
+    }
+
+    return null;
+  }
 }
diff --git a/lib/src/utils.dart b/lib/src/utils.dart
deleted file mode 100644
index 1813084..0000000
--- a/lib/src/utils.dart
+++ /dev/null
@@ -1,3 +0,0 @@
-/// Taking the singular form of [word], morphs it according to [count].
-String pluralise(String word, int count) => '${count == 0 ? 'no' : count} '
-    '${count == 0 || count > 1 ? '${word}s' : word}';
diff --git a/pubspec.yaml b/pubspec.yaml
index ed61e4a..b3e5038 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,20 +1,18 @@
 name: robots_txt
-version: 1.1.1
+version: 2.0.0
 
-description: >-
-  A lightweight `robots.txt` ruleset parser to ensure your application adheres
-  the de facto standard.
+description: A complete, dependency-less and fully documented `robots.txt` ruleset parser.
 
 homepage: https://github.com/wordcollector/robots_txt
 repository: https://github.com/wordcollector/robots_txt
 issue_tracker: https://github.com/wordcollector/robots_txt/issues
 
 environment:
-  sdk: '>=2.13.0 <3.0.0'
+  sdk: '>=2.17.0 <3.0.0'
 
 dependencies:
-  sprint: ^1.0.4
-  web_scraper: ^0.1.4
+  meta: ^1.8.0 # Used for static analysis.
 
 dev_dependencies:
-  words: ^0.1.1
+  test: ^1.22.1 # Testing.
+  words: ^0.2.0 # Stricter lints.
diff --git a/test/contents_definitions.dart b/test/contents_definitions.dart
new file mode 100644
index 0000000..e57b35a
--- /dev/null
+++ b/test/contents_definitions.dart
@@ -0,0 +1,126 @@
+import 'package:robots_txt/robots_txt.dart';
+
+/// Empty file contents.
+const emptyContents = '';
+
+/// Invalid `robots.txt` contents.
+const invalidContents = 'This is an invalid robots.txt file.';
+
+/// Valid `robots.txt` file with an invalid disallow field.
+final validContentsInvalidPattern = '''
+${FieldType.userAgent.toField('A')}
+${FieldType.disallow.toField(r'/\$')}
+''';
+
+/// Valid `robots.txt` file with all supported fields with example values.
+final validContentsValidPattern =
+    FieldType.values.map((value) => value.toField()).join('\n');
+
+/// Example rule fields without a user-agent.
+final rulesWithoutUserAgent =
+    FieldType.rules.map((value) => value.toField()).join('\n');
+
+/// Example rule fields defined before a user-agent.
+final rulesDefinedBeforeUserAgent = [...FieldType.rules, FieldType.userAgent]
+    .map((value) => value.toField())
+    .join();
+
+/// Example sitemap field.
+final sitemap = FieldType.sitemap.toField();
+
+/// File disallowed for user-agent 'A'.
+final fileDisallowedForA = '''
+${FieldType.userAgent.toField('A')}
+${FieldType.disallow.toField('/file.txt')}
+''';
+
+/// File disallowed for user-agents 'A' and 'B'.
+final fileDisallowedForAAndB = '''
+${FieldType.userAgent.toField('A')}
+${FieldType.userAgent.toField('B')}
+${FieldType.disallow.toField('/file.txt')}
+''';
+
+/// File disallowed for all user-agents.
+final fileDisallowedForAll = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/file.txt')}
+''';
+
+/// File disallowed for all user-agents except 'A'.
+final fileDisallowedForAllExceptA = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/file.txt')}
+${FieldType.userAgent.toField('A')}
+${FieldType.allow.toField('/file.txt')}
+''';
+
+/// Directory disallowed.
+final directoryDisallowed = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/directory/')}
+''';
+
+/// Directory disallowed, but not a certain file.
+final directoryDisallowedButNotFile = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/directory/')}
+${FieldType.allow.toField('/directory/file.txt')}
+''';
+
+/// Directory disallowed, but not its subdirectory.
+final directoryDisallowedButNotSubdirectory = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/directory/')}
+${FieldType.allow.toField('/directory/subdirectory/')}
+''';
+
+/// Nested directory disallowed.
+final nestedDirectoryDisallowed = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/*/directory/')}
+''';
+
+/// Nested directory disallowed, but not its subdirectory.
+final nestedDirectoryDisallowedButNotSubdirectory = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/*/directory/')}
+${FieldType.allow.toField('/*/directory/subdirectory/')}
+''';
+
+/// Nested file disallowed.
+final nestedFileDisallowed = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/*/file.txt')}
+''';
+
+/// All files disallowed.
+final allFilesDisallowed = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/*.*')}
+''';
+
+/// All directories disallowed.
+final directoriesDisallowed = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/*/')}
+''';
+
+/// All text files disallowed, but not other files.
+final textFilesDisallowed = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/*.txt')}
+''';
+
+/// Files containing a certain string disallowed.
+final filesContainingStringDisallowed = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('*/*string*.*')}
+${FieldType.allow.toField('/*string*/')}
+''';
+
+/// Directories containing a certain string disallowed.
+final directoriesContainingStringDisallowed = '''
+${FieldType.userAgent.toField('*')}
+${FieldType.disallow.toField('/*string*/')}
+''';
diff --git a/test/parser_test.dart b/test/parser_test.dart
new file mode 100644
index 0000000..2b90577
--- /dev/null
+++ b/test/parser_test.dart
@@ -0,0 +1,402 @@
+import 'package:test/test.dart';
+
+import 'package:robots_txt/robots_txt.dart';
+
+import 'contents_definitions.dart';
+
+void main() {
+  late Robots robots;
+  group('The parser correctly parses', () {
+    group('file contents', () {
+      test('that are empty.', () {
+        expect(() => robots = Robots.parse(emptyContents), returnsNormally);
+        expect(robots.verifyCanAccess('/', userAgent: 'A'), equals(true));
+      });
+
+      test('that are not valid.', () {
+        expect(() => robots = Robots.parse(invalidContents), returnsNormally);
+        expect(robots.verifyCanAccess('/', userAgent: 'A'), equals(true));
+      });
+
+      test('that are valid, but have an invalid pattern.', () {
+        expect(
+          () => robots = Robots.parse(validContentsInvalidPattern),
+          returnsNormally,
+        );
+      });
+
+      test('that are valid.', () {
+        expect(
+          () => robots = Robots.parse(validContentsValidPattern),
+          returnsNormally,
+        );
+        expect(robots.rulesets.length, equals(1));
+        final ruleset = robots.rulesets.first;
+        expect(ruleset.disallows.length, equals(1));
+        expect(ruleset.allows.length, equals(1));
+        expect(robots.sitemaps.length, equals(1));
+        expect(robots.verifyCanAccess('/', userAgent: 'A'), equals(false));
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(true),
+        );
+      });
+
+      test('that define a sitemap.', () {
+        expect(() => robots = Robots.parse(sitemap), returnsNormally);
+        expect(robots.sitemaps, equals(<String>[FieldType.sitemap.example]));
+      });
+    });
+
+    group('logical rules', () {
+      test('defined without a user agent.', () {
+        expect(
+          () => robots = Robots.parse(rulesWithoutUserAgent),
+          returnsNormally,
+        );
+        expect(robots.rulesets, equals(<Ruleset>[]));
+        expect(robots.verifyCanAccess('/', userAgent: 'A'), equals(true));
+      });
+
+      test('defined before a user agent.', () {
+        expect(
+          () => robots = Robots.parse(rulesDefinedBeforeUserAgent),
+          returnsNormally,
+        );
+        expect(robots.rulesets, equals(<Ruleset>[]));
+        expect(robots.verifyCanAccess('/', userAgent: 'A'), equals(true));
+      });
+
+      test('that disallow a file for A.', () {
+        expect(
+          () => robots = Robots.parse(fileDisallowedForA),
+          returnsNormally,
+        );
+        expect(robots.verifyCanAccess('/', userAgent: 'A'), equals(true));
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(false),
+        );
+      });
+
+      test('that disallow a file for both A and B.', () {
+        expect(
+          () => robots = Robots.parse(fileDisallowedForAAndB),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'B'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'C'),
+          equals(true),
+        );
+      });
+
+      test('that disallow a file for all user-agents.', () {
+        expect(
+          () => robots = Robots.parse(fileDisallowedForAll),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'B'),
+          equals(false),
+        );
+      });
+
+      test('that disallow a file for all user-agents except A.', () {
+        expect(
+          () => robots = Robots.parse(fileDisallowedForAllExceptA),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'B'),
+          equals(false),
+        );
+      });
+    });
+
+    group('rules', () {
+      test('that disallow a directory.', () {
+        expect(
+          () => robots = Robots.parse(directoryDisallowed),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/directory/', userAgent: 'A'),
+          equals(false),
+        );
+      });
+
+      test('that disallow a directory, but allow a file from within it.', () {
+        expect(
+          () => robots = Robots.parse(directoryDisallowedButNotFile),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/directory/', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/directory/file.txt', userAgent: 'A'),
+          equals(true),
+        );
+      });
+
+      test('that disallow a directory, but allow its subdirectory.', () {
+        expect(
+          () => robots = Robots.parse(directoryDisallowedButNotSubdirectory),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/directory/', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/directory/file.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/directory/subdirectory/', userAgent: 'A'),
+          equals(true),
+        );
+      });
+
+      test('that disallow a nested directory.', () {
+        expect(
+          () => robots = Robots.parse(nestedDirectoryDisallowed),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/directory/', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/one/directory/', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/one/two/directory/', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/one/two/three/', userAgent: 'A'),
+          equals(true),
+        );
+      });
+
+      test('that disallow a nested directory, but allow its subdirectory.', () {
+        expect(
+          () => robots = Robots.parse(
+            nestedDirectoryDisallowedButNotSubdirectory,
+          ),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/nest/directory/', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess(
+            '/nest/directory/subdirectory/',
+            userAgent: 'A',
+          ),
+          equals(true),
+        );
+      });
+
+      test('that disallow a nested file.', () {
+        expect(
+          () => robots = Robots.parse(nestedFileDisallowed),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/directory/file.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess(
+            '/directory/subdirectory/file.txt',
+            userAgent: 'A',
+          ),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess(
+            '/directory/subdirectory/file_2.txt',
+            userAgent: 'A',
+          ),
+          equals(true),
+        );
+      });
+
+      test('that disallow files.', () {
+        expect(
+          () => robots = Robots.parse(allFilesDisallowed),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/path', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/directory/', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/directory/file.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess(
+            '/directory/subdirectory/file.txt',
+            userAgent: 'A',
+          ),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess(
+            '/directory/subdirectory/',
+            userAgent: 'A',
+          ),
+          equals(true),
+        );
+      });
+
+      test('that disallow directories.', () {
+        expect(
+          () => robots = Robots.parse(directoriesDisallowed),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/file', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/directory/', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/directory/file.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess(
+            '/directory/subdirectory/file.txt',
+            userAgent: 'A',
+          ),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess(
+            '/directory/subdirectory/',
+            userAgent: 'A',
+          ),
+          equals(false),
+        );
+      });
+
+      test('that disallow only text files.', () {
+        expect(
+          () => robots = Robots.parse(textFilesDisallowed),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/file.pdf', userAgent: 'A'),
+          equals(true),
+        );
+      });
+
+      test('that disallow files that contain a certain string.', () {
+        expect(
+          () => robots = Robots.parse(filesContainingStringDisallowed),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/file.txt', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/string.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/abc|string.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/string|abc.txt', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/string/file.txt', userAgent: 'A'),
+          equals(true),
+        );
+      });
+
+      test('that disallow directories that contain a certain string.', () {
+        expect(
+          () => robots = Robots.parse(directoriesContainingStringDisallowed),
+          returnsNormally,
+        );
+        expect(
+          robots.verifyCanAccess('/string.txt', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/directory/string.txt', userAgent: 'A'),
+          equals(true),
+        );
+        expect(
+          robots.verifyCanAccess('/string/', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/abc|string/', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/string|abc/', userAgent: 'A'),
+          equals(false),
+        );
+        expect(
+          robots.verifyCanAccess('/one/two/three/string/five/', userAgent: 'A'),
+          equals(false),
+        );
+      });
+    });
+  });
+}