Skip to content

Commit 93f7d7a

Browse files
committed
Minor doc fixes in nucleo_matcher
1 parent e644134 commit 93f7d7a

File tree

5 files changed

+30
-18
lines changed

5 files changed

+30
-18
lines changed

Diff for: matcher/src/chars.rs

+17-9
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
//! Utilities for working with (unicode) characters/codepoints
1+
//! Utilities for working with (Unicode) characters and codepoints.
22
33
use std::fmt::{self, Debug, Display};
44

55
#[cfg(feature = "unicode-casefold")]
66
use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
77
use crate::Config;
88

9-
//autogenerated by generate-ucd
9+
// autogenerated by generate-ucd
1010
#[allow(warnings)]
1111
#[rustfmt::skip]
1212
#[cfg(feature = "unicode-casefold")]
@@ -82,6 +82,7 @@ impl Char for AsciiChar {
8282
self
8383
}
8484
}
85+
8586
fn char_class_non_ascii(c: char) -> CharClass {
8687
if c.is_lowercase() {
8788
CharClass::Lower
@@ -97,6 +98,7 @@ fn char_class_non_ascii(c: char) -> CharClass {
9798
CharClass::NonWord
9899
}
99100
}
101+
100102
impl Char for char {
101103
const ASCII: bool = false;
102104
#[inline(always)]
@@ -149,7 +151,7 @@ pub use normalize::normalize;
149151
#[cfg(feature = "unicode-segmentation")]
150152
use unicode_segmentation::UnicodeSegmentation;
151153

152-
/// Converts a character to lower case using simple unicode case folding
154+
/// Converts a character to lower case using simple Unicode case folding.
153155
#[cfg(feature = "unicode-casefold")]
154156
#[inline(always)]
155157
pub fn to_lower_case(c: char) -> char {
@@ -158,8 +160,9 @@ pub fn to_lower_case(c: char) -> char {
158160
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
159161
}
160162

161-
/// Checks if a character is upper case according to simple unicode case folding.
162-
/// if the `unicode-casefold` feature is disable the equivalent std function is used
163+
/// Checks if a character is upper case according to simple Unicode case folding.
164+
///
165+
/// If the `unicode-casefold` feature is disabled, the equivalent std function is used instead.
163166
#[inline(always)]
164167
pub fn is_upper_case(c: char) -> bool {
165168
#[cfg(feature = "unicode-casefold")]
@@ -182,10 +185,15 @@ pub(crate) enum CharClass {
182185
Number,
183186
}
184187

185-
/// Nucleo cannot match graphemes as single units. To work around
186-
/// that we only use the first codepoint of each grapheme. This
187-
/// iterator returns the first character of each unicode grapheme
188-
/// in a string and is used for constructing `Utf32Str(ing)`.
188+
/// Returns an iterator over single-codepoint representations of each grapheme in the provided
189+
/// text.
190+
///
191+
/// For the most part, this is simply the first `char` of a grapheme. The main exception is the
192+
/// windows-style newline `\r\n`, which is normalized to the char `'\n'`.
193+
///
194+
/// This workaround mainly exists since Nucleo cannot match graphemes as single units, so we
195+
/// must internally map each grapheme to a simpler in-memory representation. This method is used
196+
/// when constructing `Utf32Str(ing)`.
189197
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
190198
#[cfg(feature = "unicode-segmentation")]
191199
let res = text.graphemes(true).map(|grapheme| {

Diff for: matcher/src/chars/normalize.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
/// Normalize a Unicode character by converting Latin characters which are variants
2-
/// of ASCII characters to their latin equivalent.
2+
/// of ASCII characters to their Latin equivalents.
33
///
44
/// Note that this method acts on single `char`s: if you want to perform full normalization, you
55
/// should first split on graphemes, and then normalize each grapheme by normalizing the first
6-
/// `char` in the grapheme.
6+
/// `char` in each grapheme. See the [`graphemes`](super::graphemes) function for more detail.
77
///
88
/// If a character does not normalize to a single ASCII character, no normalization is performed.
99
///
@@ -15,7 +15,7 @@
1515
/// - [Latin Extended Additional](https://en.wikipedia.org/wiki/Latin_Extended_Additional)
1616
/// - [Superscripts and Subscripts](https://en.wikipedia.org/wiki/Superscripts_and_Subscripts)
1717
///
18-
/// If the character does not fall in this block, it is not normalized.
18+
/// If the character does not fall in any of these blocks, it is not normalized.
1919
///
2020
/// # Example
2121
/// ```

Diff for: matcher/src/config.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ pub struct Config {
2929
}
3030

3131
impl Config {
32-
/// The default config for nucleo, implemented as a constant since
33-
/// Default::default can not be called in a const context
32+
/// The default configuration for nucleo, implemented as a constant since
33+
/// [`Default::default`] cannot be called in a `const` context.
3434
pub const DEFAULT: Self = {
3535
Config {
3636
delimiter_chars: b"/,:;|",

Diff for: matcher/src/lib.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,11 @@ impl Default for Matcher {
183183
}
184184

185185
impl Matcher {
186-
/// Creates a new matcher instance, note that this will eagerly allocate a
187-
/// fairly large chunk of heap memory (around 135KB currently but subject to
188-
/// change) so matchers should be reused if called often (like in a loop).
186+
/// Creates a new matcher instance.
187+
///
188+
/// This will eagerly allocate a fairly large chunk of heap memory (around 135KB
189+
/// currently, but subject to change) so matchers should be reused if called often,
190+
/// such as in a loop.
189191
pub fn new(config: Config) -> Self {
190192
Self {
191193
config,

Diff for: matcher/src/utf32_str.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ fn has_ascii_graphemes(string: &str) -> bool {
9595
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
9696
pub enum Utf32Str<'a> {
9797
/// A string represented as ASCII encoded bytes.
98+
///
9899
/// Correctness invariant: must only contain valid ASCII (`<= 127`)
99100
Ascii(&'a [u8]),
100101
/// A string represented as an array of unicode codepoints (basically UTF-32).
@@ -301,7 +302,8 @@ impl DoubleEndedIterator for Chars<'_> {
301302
/// See the API documentation for [`Utf32Str`] for more detail.
302303
pub enum Utf32String {
303304
/// A string represented as ASCII encoded bytes.
304-
/// Correctness invariant: must only contain valid ASCII (<=127)
305+
///
306+
/// Correctness invariant: must only contain valid ASCII (`<= 127`)
305307
Ascii(Box<str>),
306308
/// A string represented as an array of unicode codepoints (basically UTF-32).
307309
Unicode(Box<[char]>),

0 commit comments

Comments
 (0)