Skip to content

Commit 81426fc

Browse files
Purge more quadratic-time regexes (#581)
1 parent 4c8f444 commit 81426fc

File tree

2 files changed

+42
-14
lines changed

2 files changed

+42
-14
lines changed

src/diff/word.js

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import Diff from './base';
2-
import { longestCommonPrefix, longestCommonSuffix, replacePrefix, replaceSuffix, removePrefix, removeSuffix, maximumOverlap } from '../util/string';
2+
import { longestCommonPrefix, longestCommonSuffix, replacePrefix, replaceSuffix, removePrefix, removeSuffix, maximumOverlap, leadingWs, trailingWs } from '../util/string';
33

44
// Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode
55
//
@@ -193,10 +193,10 @@ function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep
193193
// * Just a "delete"
194194
// We handle the three cases separately.
195195
if (deletion && insertion) {
196-
const oldWsPrefix = deletion.value.match(/^\s*/)[0];
197-
const oldWsSuffix = deletion.value.match(/\s*$/)[0];
198-
const newWsPrefix = insertion.value.match(/^\s*/)[0];
199-
const newWsSuffix = insertion.value.match(/\s*$/)[0];
196+
const oldWsPrefix = leadingWs(deletion.value);
197+
const oldWsSuffix = trailingWs(deletion.value);
198+
const newWsPrefix = leadingWs(insertion.value);
199+
const newWsSuffix = trailingWs(insertion.value);
200200

201201
if (startKeep) {
202202
const commonWsPrefix = longestCommonPrefix(oldWsPrefix, newWsPrefix);
@@ -218,16 +218,18 @@ function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep
218218
// whitespace and deleting duplicate leading whitespace where
219219
// present.
220220
if (startKeep) {
221-
insertion.value = insertion.value.replace(/^\s*/, '');
221+
const ws = leadingWs(insertion.value);
222+
insertion.value = insertion.value.substring(ws.length);
222223
}
223224
if (endKeep) {
224-
endKeep.value = endKeep.value.replace(/^\s*/, '');
225+
const ws = leadingWs(endKeep.value);
226+
endKeep.value = endKeep.value.substring(ws.length);
225227
}
226228
// otherwise we've got a deletion and no insertion
227229
} else if (startKeep && endKeep) {
228-
const newWsFull = endKeep.value.match(/^\s*/)[0],
229-
delWsStart = deletion.value.match(/^\s*/)[0],
230-
delWsEnd = deletion.value.match(/\s*$/)[0];
230+
const newWsFull = leadingWs(endKeep.value),
231+
delWsStart = leadingWs(deletion.value),
232+
delWsEnd = trailingWs(deletion.value);
231233

232234
// Any whitespace that comes straight after startKeep in both the old and
233235
// new texts, assign to startKeep and remove from the deletion.
@@ -255,16 +257,16 @@ function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep
255257
// We are at the start of the text. Preserve all the whitespace on
256258
// endKeep, and just remove whitespace from the end of deletion to the
257259
// extent that it overlaps with the start of endKeep.
258-
const endKeepWsPrefix = endKeep.value.match(/^\s*/)[0];
259-
const deletionWsSuffix = deletion.value.match(/\s*$/)[0];
260+
const endKeepWsPrefix = leadingWs(endKeep.value);
261+
const deletionWsSuffix = trailingWs(deletion.value);
260262
const overlap = maximumOverlap(deletionWsSuffix, endKeepWsPrefix);
261263
deletion.value = removeSuffix(deletion.value, overlap);
262264
} else if (startKeep) {
263265
// We are at the END of the text. Preserve all the whitespace on
264266
// startKeep, and just remove whitespace from the start of deletion to
265267
// the extent that it overlaps with the end of startKeep.
266-
const startKeepWsSuffix = startKeep.value.match(/\s*$/)[0];
267-
const deletionWsPrefix = deletion.value.match(/^\s*/)[0];
268+
const startKeepWsSuffix = trailingWs(startKeep.value);
269+
const deletionWsPrefix = leadingWs(deletion.value);
268270
const overlap = maximumOverlap(startKeepWsSuffix, deletionWsPrefix);
269271
deletion.value = removePrefix(deletion.value, overlap);
270272
}

src/util/string.js

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,29 @@ export function hasOnlyWinLineEndings(string) {
101101
export function hasOnlyUnixLineEndings(string) {
102102
return !string.includes('\r\n') && string.includes('\n');
103103
}
104+
105+
export function trailingWs(string) {
106+
// Yes, this looks overcomplicated and dumb - why not replace the whole function with
107+
// return string match(/\s*$/)[0]
108+
// you ask? Because:
109+
// 1. the trap described at https://markamery.com/blog/quadratic-time-regexes/ would mean doing
110+
// this would cause this function to take O(n²) time in the worst case (specifically when
111+
// there is a massive run of NON-TRAILING whitespace in `string`), and
112+
// 2. the fix proposed in the same blog post, of using a negative lookbehind, is incompatible
113+
// with old Safari versions that we'd like to not break if possible (see
114+
// https://github.com/kpdecker/jsdiff/pull/550)
115+
// It feels absurd to do this with an explicit loop instead of a regex, but I really can't see a
116+
// better way that doesn't result in broken behaviour.
117+
let i;
118+
for (i = string.length - 1; i >= 0; i--) {
119+
if (!string[i].match(/\s/)) {
120+
break;
121+
}
122+
}
123+
return string.substring(i + 1);
124+
}
125+
126+
export function leadingWs(string) {
127+
// Thankfully the annoying considerations described in trailingWs don't apply here:
128+
return string.match(/^\s*/)[0];
129+
}

0 commit comments

Comments
 (0)