@@ -223,26 +223,25 @@ STRINGLIB(_lex_search)(const STRINGLIB_CHAR *p,
223
223
Also find the period of the right half.
224
224
Direction:
225
225
dir : {-1, 1}
226
- if dir == -1, then the problem is reverse
226
+ if dir == -1, then the problem is reverse
227
227
In short:
228
- _lex_search(x, -1) == _lex_search(x[::-1], 1)
229
-
230
- Returned cut is "the size of the cut towards chosen direction".
231
- E.g.:
232
- >>> x = '1234'
233
- >>> cut, period = factorize(x, dir=1) # cut = 0
234
- >>> cut
235
- 0
236
- >>> cut_idx = cut
237
- >>> x[:cut_idx], x[cut_idx:]
238
- '', '1234'
239
- >>> x = '4321'
240
- >>> cut, period = factorize(x, dir=-1)
241
- >>> cut
242
- 0
243
- >>> cut_idx = len(x) - cut
244
- >>> x[:cut_idx], x[cut_idx:]
245
- '4321', ''
228
+ _lex_search(x, -1) == _lex_search(x[::-1], 1)
229
+
230
+ Returned cut is the size of the cut towards chosen direction. E.g.:
231
+ >>> x = '1234'
232
+ >>> cut, period = factorize(x, dir=1) # cut = 0
233
+ >>> cut
234
+ 0
235
+ >>> cut_idx = cut
236
+ >>> x[:cut_idx], x[cut_idx:]
237
+ '', '1234'
238
+ >>> x = '4321'
239
+ >>> cut, period = factorize(x, dir=-1)
240
+ >>> cut
241
+ 0
242
+ >>> cut_idx = len(x) - cut
243
+ >>> x[:cut_idx], x[cut_idx:]
244
+ '4321', ''
246
245
*/
247
246
Py_ssize_t max_suffix = 0 ;
248
247
Py_ssize_t candidate = 1 ;
@@ -640,7 +639,7 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
640
639
>>> ss_fwd, ss_rev
641
640
(1, 4)
642
641
643
- There is one more important variable here : j_off
642
+ There is one more important variable: j_off
644
643
It brings ss in alignment with a needle.
645
644
So that it stands at the first absolute index of the window
646
645
@@ -653,17 +652,18 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
653
652
654
653
such that [0, 1, 2, 3, 4, 5]
655
654
[0, 1]
656
- * - both indices are at 0 here
655
+ * - both indices are at 0
657
656
658
657
>>> j_off_rev = dir_rev * i - p_end_rev
659
658
>>> ss_rev + j_off_rev
660
659
4
661
660
662
661
such that [0, 1, 2, 3, 4, 5]
663
662
[0, 1]
664
- * - both indices are at 0 here
663
+ * - both indices are at 0
665
664
Finally, which side it iterates from is determined by:
666
665
jp = p_stt + (reversed ? -j : j);
666
+ , where j is an increasing needle-size counter in both cases
667
667
668
668
With this transformation the problem becomes direction agnostic
669
669
@@ -770,7 +770,7 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
770
770
const double hrs_lcost = 4.0 ; // average loop cost
771
771
const double hrs_hcost = 0.4 ; // false positive hit cost
772
772
// Two-Way Calibration
773
- const double twy_icost = 3.0 * (double )m ; // total initialization cost
773
+ const double twy_icost = 3.5 * (double )m ; // total initialization cost
774
774
const double twy_lcost = 3.0 ; // loop cost
775
775
// Temporary
776
776
double exp_hrs , exp_twy , ll ; // expected run times & loops left
@@ -1118,7 +1118,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
1118
1118
return res == 0 ? 0 : -1 ;
1119
1119
}
1120
1120
}
1121
- int dynamic = 1 ;
1121
+ int dynamic = 1 ; // dynamic fallback to two-way algorithm flag
1122
1122
int dir = mode != FAST_RSEARCH ? 1 : -1 ;
1123
1123
return STRINGLIB (horspool_find )(s , n , p , m , maxcount , mode , dir , dynamic );
1124
1124
}
0 commit comments