Skip to content

Commit f3671e1

Browse files
committed
comments and calibration
1 parent 4667880 commit f3671e1

File tree

1 file changed

+24
-24
lines changed

1 file changed

+24
-24
lines changed

Objects/stringlib/fastsearch.h

+24-24
Original file line numberDiff line numberDiff line change
@@ -223,26 +223,25 @@ STRINGLIB(_lex_search)(const STRINGLIB_CHAR *p,
223223
Also find the period of the right half.
224224
Direction:
225225
dir : {-1, 1}
226-
if dir == -1, then the problem is reverse
226+
if dir == -1, then the problem is reverse
227227
In short:
228-
_lex_search(x, -1) == _lex_search(x[::-1], 1)
229-
230-
Returned cut is "the size of the cut towards chosen direction".
231-
E.g.:
232-
>>> x = '1234'
233-
>>> cut, period = factorize(x, dir=1) # cut = 0
234-
>>> cut
235-
0
236-
>>> cut_idx = cut
237-
>>> x[:cut_idx], x[cut_idx:]
238-
'', '1234'
239-
>>> x = '4321'
240-
>>> cut, period = factorize(x, dir=-1)
241-
>>> cut
242-
0
243-
>>> cut_idx = len(x) - cut
244-
>>> x[:cut_idx], x[cut_idx:]
245-
'4321', ''
228+
_lex_search(x, -1) == _lex_search(x[::-1], 1)
229+
230+
Returned cut is the size of the cut towards chosen direction. E.g.:
231+
>>> x = '1234'
232+
>>> cut, period = factorize(x, dir=1) # cut = 0
233+
>>> cut
234+
0
235+
>>> cut_idx = cut
236+
>>> x[:cut_idx], x[cut_idx:]
237+
'', '1234'
238+
>>> x = '4321'
239+
>>> cut, period = factorize(x, dir=-1)
240+
>>> cut
241+
0
242+
>>> cut_idx = len(x) - cut
243+
>>> x[:cut_idx], x[cut_idx:]
244+
'4321', ''
246245
*/
247246
Py_ssize_t max_suffix = 0;
248247
Py_ssize_t candidate = 1;
@@ -640,7 +639,7 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
640639
>>> ss_fwd, ss_rev
641640
(1, 4)
642641
643-
There is one more important variable here: j_off
642+
There is one more important variable: j_off
644643
It brings ss in alignment with a needle.
645644
So that it stands at the first absolute index of the window
646645
@@ -653,17 +652,18 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
653652
654653
such that [0, 1, 2, 3, 4, 5]
655654
[0, 1]
656-
* - both indices are at 0 here
655+
* - both indices are at 0
657656
658657
>>> j_off_rev = dir_rev * i - p_end_rev
659658
>>> ss_rev + j_off_rev
660659
4
661660
662661
such that [0, 1, 2, 3, 4, 5]
663662
[0, 1]
664-
* - both indices are at 0 here
663+
* - both indices are at 0
665664
Finally, which side it iterates from is determined by:
666665
jp = p_stt + (reversed ? -j : j);
666+
, where j is an increasing needle-size counter in both cases
667667
668668
With this transformation the problem becomes direction agnostic
669669
@@ -770,7 +770,7 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
770770
const double hrs_lcost = 4.0; // average loop cost
771771
const double hrs_hcost = 0.4; // false positive hit cost
772772
// Two-Way Calibration
773-
const double twy_icost = 3.0 * (double)m; // total initialization cost
773+
const double twy_icost = 3.5 * (double)m; // total initialization cost
774774
const double twy_lcost = 3.0; // loop cost
775775
// Temporary
776776
double exp_hrs, exp_twy, ll; // expected run times & loops left
@@ -1118,7 +1118,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
11181118
return res == 0 ? 0 : -1;
11191119
}
11201120
}
1121-
int dynamic = 1;
1121+
int dynamic = 1; // dynamic fallback to two-way algorithm flag
11221122
int dir = mode != FAST_RSEARCH ? 1 : -1;
11231123
return STRINGLIB(horspool_find)(s, n, p, m, maxcount, mode, dir, dynamic);
11241124
}

0 commit comments

Comments
 (0)