@@ -256,7 +256,7 @@ STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
256
256
257
257
The local period of the cut is the minimal length of a string w
258
258
such that (left endswith w or w endswith left)
259
- and (right startswith w or w startswith left ).
259
+ and (right startswith w or w startswith right ).
260
260
261
261
The Critical Factorization Theorem says that this maximal local
262
262
period is the global period of the string.
@@ -337,21 +337,20 @@ STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
337
337
if (p -> is_periodic ) {
338
338
assert (p -> cut <= len_needle /2 );
339
339
assert (p -> cut < p -> period );
340
- p -> gap = 0 ; // unused
341
340
}
342
341
else {
343
342
// A lower bound on the period
344
343
p -> period = Py_MAX (p -> cut , len_needle - p -> cut ) + 1 ;
345
- // The gap between the last character and the previous
346
- // occurrence of an equivalent character (modulo TABLE_SIZE)
347
- p -> gap = len_needle ;
348
- STRINGLIB_CHAR last = needle [ len_needle - 1 ] & TABLE_MASK ;
349
- for ( Py_ssize_t i = len_needle - 2 ; i >= 0 ; i -- ) {
350
- STRINGLIB_CHAR x = needle [ i ] & TABLE_MASK ;
351
- if ( x == last ) {
352
- p -> gap = len_needle - 1 - i ;
353
- break ;
354
- }
344
+ }
345
+ // The gap between the last character and the previous
346
+ // occurrence of an equivalent character (modulo TABLE_SIZE)
347
+ p -> gap = len_needle ;
348
+ STRINGLIB_CHAR last = needle [ len_needle - 1 ] & TABLE_MASK ;
349
+ for ( Py_ssize_t i = len_needle - 2 ; i >= 0 ; i -- ) {
350
+ STRINGLIB_CHAR x = needle [ i ] & TABLE_MASK ;
351
+ if ( x == last ) {
352
+ p -> gap = len_needle - 1 - i ;
353
+ break ;
355
354
}
356
355
}
357
356
// Fill up a compressed Boyer-Moore "Bad Character" table
@@ -383,6 +382,8 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
383
382
const STRINGLIB_CHAR * window ;
384
383
LOG ("===== Two-way: \"%s\" in \"%s\". =====\n" , needle , haystack );
385
384
385
+ Py_ssize_t gap = p -> gap ;
386
+ Py_ssize_t gap_jump_end = Py_MIN (len_needle , cut + gap );
386
387
if (p -> is_periodic ) {
387
388
LOG ("Needle is periodic.\n" );
388
389
Py_ssize_t memory = 0 ;
@@ -408,8 +409,16 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
408
409
Py_ssize_t i = Py_MAX (cut , memory );
409
410
for (; i < len_needle ; i ++ ) {
410
411
if (needle [i ] != window [i ]) {
411
- LOG ("Right half does not match.\n" );
412
- window_last += i - cut + 1 ;
412
+ if (i < gap_jump_end ) {
413
+ LOG ("Early right half mismatch: jump by gap.\n" );
414
+ assert (gap >= i - cut + 1 );
415
+ window_last += gap ;
416
+ }
417
+ else {
418
+ LOG ("Late right half mismatch: jump by n (>gap)\n" );
419
+ assert (i - cut + 1 > gap );
420
+ window_last += i - cut + 1 ;
421
+ }
413
422
memory = 0 ;
414
423
goto periodicwindowloop ;
415
424
}
@@ -442,10 +451,8 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
442
451
}
443
452
}
444
453
else {
445
- Py_ssize_t gap = p -> gap ;
446
454
period = Py_MAX (gap , period );
447
455
LOG ("Needle is not periodic.\n" );
448
- Py_ssize_t gap_jump_end = Py_MIN (len_needle , cut + gap );
449
456
windowloop :
450
457
while (window_last < haystack_end ) {
451
458
for (;;) {
@@ -463,19 +470,19 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
463
470
window = window_last - len_needle + 1 ;
464
471
assert ((window [len_needle - 1 ] & TABLE_MASK ) ==
465
472
(needle [len_needle - 1 ] & TABLE_MASK ));
466
- for (Py_ssize_t i = cut ; i < gap_jump_end ; i ++ ) {
467
- if (needle [i ] != window [i ]) {
468
- LOG ("Early right half mismatch: jump by gap.\n" );
469
- assert (gap >= i - cut + 1 );
470
- window_last += gap ;
471
- goto windowloop ;
472
- }
473
- }
474
- for (Py_ssize_t i = gap_jump_end ; i < len_needle ; i ++ ) {
473
+ Py_ssize_t i = cut ;
474
+ for (; i < len_needle ; i ++ ) {
475
475
if (needle [i ] != window [i ]) {
476
- LOG ("Late right half mismatch.\n" );
477
- assert (i - cut + 1 > gap );
478
- window_last += i - cut + 1 ;
476
+ if (i < gap_jump_end ) {
477
+ LOG ("Early right half mismatch: jump by gap.\n" );
478
+ assert (gap >= i - cut + 1 );
479
+ window_last += gap ;
480
+ }
481
+ else {
482
+ LOG ("Late right half mismatch: jump by n (>gap)\n" );
483
+ assert (i - cut + 1 > gap );
484
+ window_last += i - cut + 1 ;
485
+ }
479
486
goto windowloop ;
480
487
}
481
488
}
0 commit comments