Merge pull request #12 from jmelahman/jamison/invalid-escape-sequence

ZeroCool940711 · web-flow · commit 3d984287da51 · 2024-01-03T08:28:56.000-07:00
[chore] fix "SyntaxError: invalid escape sequence"
diff --git a/src/whoosh/analysis/filters.py b/src/whoosh/analysis/filters.py
@@ -53,7 +53,7 @@
     \\S+?                  # URL body
     (?=\\s|[.]\\s|$|[.]$)  # Stop at space/end, or a dot followed by space/end
 ) | (                      # or...
-    \w+([:.]?\w+)*         # word characters, with opt. internal colons/dots
+    \\w+([:.]?\\w+)*         # word characters, with opt. internal colons/dots
 )
 """, verbose=True)
 
@@ -145,7 +145,7 @@ def __call__(self, tokens):
 
 
 class TeeFilter(Filter):
-    """Interleaves the results of two or more filters (or filter chains).
+    r"""Interleaves the results of two or more filters (or filter chains).
 
     NOTE: because it needs to create copies of each token for each sub-filter,
     this filter is quite slow.
diff --git a/src/whoosh/analysis/intraword.py b/src/whoosh/analysis/intraword.py
@@ -34,7 +34,7 @@
 
 
 class CompoundWordFilter(Filter):
-    """Given a set of words (or any object with a ``__contains__`` method),
+    r"""Given a set of words (or any object with a ``__contains__`` method),
     break any tokens in the stream that are composites of words in the word set
     into their individual parts.
 
@@ -272,7 +272,7 @@ class IntraWordFilter(Filter):
     >>> iwf_i = IntraWordFilter(mergewords=True, mergenums=True)
     >>> iwf_q = IntraWordFilter(mergewords=False, mergenums=False)
     >>> iwf = MultiFilter(index=iwf_i, query=iwf_q)
-    >>> analyzer = RegexTokenizer(r"\S+") | iwf | LowercaseFilter()
+    >>> analyzer = RegexTokenizer(r"\\S+") | iwf | LowercaseFilter()
 
     (See :class:`MultiFilter`.)
     """
@@ -282,7 +282,7 @@ class IntraWordFilter(Filter):
     __inittypes__ = dict(delims=text_type, splitwords=bool, splitnums=bool,
                          mergewords=bool, mergenums=bool)
 
-    def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\|;:,./?`~=+"),
+    def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+"),
                  splitwords=True, splitnums=True,
                  mergewords=False, mergenums=False):
         """
diff --git a/src/whoosh/lang/paicehusk.py b/src/whoosh/lang/paicehusk.py
@@ -30,7 +30,7 @@ class PaiceHuskStemmer(object):
     (?P<cont>[.>])
     """, re.UNICODE | re.VERBOSE)
 
-    stem_expr = re.compile("^\w+", re.UNICODE)
+    stem_expr = re.compile(r"^\w+", re.UNICODE)
 
     def __init__(self, ruletable):
         """
diff --git a/src/whoosh/lang/porter2.py b/src/whoosh/lang/porter2.py
@@ -64,7 +64,7 @@ def remove_initial_apostrophe(word):
 def capitalize_consonant_ys(word):
     if word.startswith('y'):
         word = 'Y' + word[1:]
-    return ccy_exp.sub('\g<1>Y', word)
+    return ccy_exp.sub(r'\g<1>Y', word)
 
 
 def step_0(word):
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
@@ -520,7 +520,7 @@ def test_stop_lang():
 
 
 def test_issue358():
-    t = analysis.RegexTokenizer("\w+")
+    t = analysis.RegexTokenizer(r"\w+")
     with pytest.raises(analysis.CompositionError):
         _ = t | analysis.StandardAnalyzer()