Skip to content

Commit c8e1cc5

Browse files
committed
ready for review
1 parent 4e9d278 commit c8e1cc5

File tree

1 file changed

+35
-30
lines changed

1 file changed

+35
-30
lines changed

Objects/stringlib/fastsearch.h

+35-30
Original file line numberDiff line numberDiff line change
@@ -185,11 +185,17 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
185185
# define LOG_LEVEL 0
186186
#if LOG_LEVEL == 1 && STRINGLIB_SIZEOF_CHAR == 1
187187
# define LOG(...) printf(__VA_ARGS__)
188-
# define LOG_STRING(s, n)
188+
# define LOG2(...)
189+
# define LOG_STRING(s, n) if (n < 100) { \
190+
printf("\"%.*s\"", (int)(n), s); \
191+
}
189192
# define LOG_LINEUP()
190193
#elif LOG_LEVEL == 2 && STRINGLIB_SIZEOF_CHAR == 1
191194
# define LOG(...) printf(__VA_ARGS__)
192-
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
195+
# define LOG2(...) printf(__VA_ARGS__)
196+
# define LOG_STRING(s, n) if (n < 100) { \
197+
printf("\"%.*s\"", (int)(n), s); \
198+
}
193199
# define LOG_LINEUP() do { \
194200
if (n < 100) { \
195201
LOG("> "); LOG_STRING(s, n); \
@@ -199,6 +205,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
199205
} while(0)
200206
#else
201207
# define LOG(...)
208+
# define LOG2(...)
202209
# define LOG_STRING(s, n)
203210
# define LOG_LINEUP()
204211
#endif
@@ -434,12 +441,13 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *s, Py_ssize_t n,
434441
// Crochemore and Perrin's (1991) Two-Way algorithm.
435442
// See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
436443
if (mode == FAST_COUNT) {
437-
LOG("Two-way Counting \"%s\" in \"%s\".\n", pw->p, s);
444+
LOG("Two-way Count.\n");
438445
}
439446
else {
440-
LOG("Two-way Finding \"%s\" in \"%s\".\n", pw->p, s);
447+
LOG("Two-way Find.\n");
441448
}
442-
449+
LOG("haystack: "); LOG_STRING(s, n); LOG("\n");
450+
LOG("needle : "); LOG_STRING(pw->p, pw->m); LOG("\n");
443451
int dir = direction < 0 ? -1 : 1;
444452
int reversed = dir < 0;
445453

@@ -480,10 +488,10 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *s, Py_ssize_t n,
480488
for (i = 0; i <= w;) {
481489
iloop++;
482490
ip = reversed ? -i : i;
483-
LOG("Last window ch: %c\n", ss[ip]);
491+
LOG2("Last window ch: %c\n", ss[ip]);
484492
LOG_LINEUP();
485493
shift = table[ss[ip] & TABLE_MASK];
486-
if (shift != 0){
494+
if (shift){
487495
if (do_mem_jump) {
488496
// A mismatch has been identified to the right
489497
// of where i will next start, so we can jump
@@ -505,15 +513,15 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *s, Py_ssize_t n,
505513
for (; j < m; j++) {
506514
ihits++;
507515
jp = p_stt + (reversed ? -j : j);
508-
LOG("Checking: %c vs %c\n", ss[j_off + jp], p[jp]);
509-
if (ss[j_off + j] != p[j]) {
516+
LOG2("Checking j=%ld: %c vs %c\n", j, ss[j_off + jp], p[jp]);
517+
if (ss[j_off + jp] != p[jp]) {
510518
if (j < gap_jump_end) {
511-
LOG("Early right half mismatch: jump by gap.\n");
519+
LOG("Early later half mismatch: jump by gap.\n");
512520
assert(gap >= j - cut + 1);
513521
i += gap;
514522
}
515523
else {
516-
LOG("Late right half mismatch: jump by n (>gap)\n");
524+
LOG("Late later half mismatch: jump by n (>gap)\n");
517525
assert(j - cut + 1 > gap);
518526
i += j - cut + 1;
519527
}
@@ -524,12 +532,13 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *s, Py_ssize_t n,
524532
if (j != m) {
525533
continue;
526534
}
527-
for (j = memory; j < cut; j++) {
535+
j = Py_MIN(memory, cut);
536+
for (; j < cut; j++) {
528537
ihits++;
529538
jp = p_stt + (reversed ? -j : j);
530-
LOG("Checking: %c vs %c\n", ss[j_off + jp], p[jp]);
531-
if (ss[j_off + j] != p[j]) {
532-
LOG("Left half does not match.\n");
539+
LOG2("Checking j=%ld: %c vs %c\n", j, ss[j_off + jp], p[jp]);
540+
if (ss[j_off + jp] != p[jp]) {
541+
LOG("First half does not match.\n");
533542
if (is_periodic) {
534543
memory = m - period;
535544
do_mem_jump = 1;
@@ -546,6 +555,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *s, Py_ssize_t n,
546555
if (++count == maxcount) {
547556
return maxcount;
548557
}
558+
memory = 0;
549559
i += m;
550560
}
551561

@@ -583,11 +593,13 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
583593
/* Boyer–Moore–Horspool algorithm
584594
with optional dynamic fallback to Two-Way algorithm */
585595
if (mode == FAST_COUNT) {
586-
LOG("Horspool Counting \"%s\" in \"%s\".\n", p, s);
596+
LOG("Horspool Count.\n");
587597
}
588598
else {
589-
LOG("Horspool Finding \"%s\" in \"%s\".\n", p, s);
599+
LOG("Horspool Find\n");
590600
}
601+
LOG("haystack: "); LOG_STRING(s, n); LOG("\n");
602+
LOG("needle : "); LOG_STRING(p, m); LOG("\n");
591603
int dir = direction < 0 ? -1 : 1;
592604
int reversed = dir < 0;
593605

@@ -657,7 +669,7 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
657669
iloop++;
658670
ip = reversed ? -i : i;
659671
s_last = ss[ip];
660-
LOG("Last window ch: %c\n", s_last);
672+
LOG2("Last window ch: %c\n", s_last);
661673
if (true_gap) {
662674
shift = 0;
663675
if (s_last != p_last) {
@@ -672,7 +684,7 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
672684
else {
673685
shift = table[s_last & TABLE_MASK];
674686
}
675-
if (shift != 0) {
687+
if (shift) {
676688
LOG("Shift: %ld\n", shift);
677689
i += shift;
678690
continue;
@@ -683,7 +695,7 @@ STRINGLIB(horspool_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
683695
for (j = 0; j < j_stop; j++) {
684696
ihits++;
685697
jp = p_stt + (reversed ? -j : j);
686-
LOG("Checking: %c vs %c\n", ss[j_off + jp], p[jp]);
698+
LOG2("Checking j=%ld: %c vs %c\n", j, ss[j_off + jp], p[jp]);
687699
if (ss[j_off + jp] != p[jp]) {
688700
break;
689701
}
@@ -990,14 +1002,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
9901002
return res == 0 ? 0 : -1;
9911003
}
9921004
}
993-
if (mode != FAST_RSEARCH) {
994-
// return STRINGLIB(horspool_find_old)(s, n, p, m, maxcount, mode, 0);
995-
// return STRINGLIB(horspool_find)(s, n, p, m, maxcount, mode, 1, 1);
996-
return STRINGLIB(two_way_find)(s, n, p, m, maxcount, mode, 1);
997-
}
998-
else {
999-
/* FAST_RSEARCH */
1000-
// return STRINGLIB(horspool_find)(s, n, p, m, maxcount, mode, -1, 1);
1001-
return STRINGLIB(two_way_find)(s, n, p, m, maxcount, mode, -1);
1002-
}
1005+
int dyn = 1;
1006+
int dir = mode != FAST_RSEARCH ? 1 : -1;
1007+
return STRINGLIB(horspool_find)(s, n, p, m, maxcount, mode, dir, dyn);
10031008
}

0 commit comments

Comments
 (0)