From 8da7274de409ba37c017980ccec0587726541302 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Fri, 14 Feb 2025 20:32:14 +0000 Subject: [PATCH] Purge more quadratic-time regexes --- src/diff/word.js | 30 ++++++++++++++++-------------- src/util/string.js | 26 ++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/diff/word.js b/src/diff/word.js index fd9284f8..3bd534ed 100644 --- a/src/diff/word.js +++ b/src/diff/word.js @@ -1,5 +1,5 @@ import Diff from './base'; -import { longestCommonPrefix, longestCommonSuffix, replacePrefix, replaceSuffix, removePrefix, removeSuffix, maximumOverlap } from '../util/string'; +import { longestCommonPrefix, longestCommonSuffix, replacePrefix, replaceSuffix, removePrefix, removeSuffix, maximumOverlap, leadingWs, trailingWs } from '../util/string'; // Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode // @@ -193,10 +193,10 @@ function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep // * Just a "delete" // We handle the three cases separately. if (deletion && insertion) { - const oldWsPrefix = deletion.value.match(/^\s*/)[0]; - const oldWsSuffix = deletion.value.match(/\s*$/)[0]; - const newWsPrefix = insertion.value.match(/^\s*/)[0]; - const newWsSuffix = insertion.value.match(/\s*$/)[0]; + const oldWsPrefix = leadingWs(deletion.value); + const oldWsSuffix = trailingWs(deletion.value); + const newWsPrefix = leadingWs(insertion.value); + const newWsSuffix = trailingWs(insertion.value); if (startKeep) { const commonWsPrefix = longestCommonPrefix(oldWsPrefix, newWsPrefix); @@ -218,16 +218,18 @@ function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep // whitespace and deleting duplicate leading whitespace where // present. if (startKeep) { - insertion.value = insertion.value.replace(/^\s*/, ''); + const ws = leadingWs(insertion.value); + insertion.value = insertion.value.substring(ws.length); } if (endKeep) { - endKeep.value = endKeep.value.replace(/^\s*/, ''); + const ws = leadingWs(endKeep.value); + endKeep.value = endKeep.value.substring(ws.length); } // otherwise we've got a deletion and no insertion } else if (startKeep && endKeep) { - const newWsFull = endKeep.value.match(/^\s*/)[0], - delWsStart = deletion.value.match(/^\s*/)[0], - delWsEnd = deletion.value.match(/\s*$/)[0]; + const newWsFull = leadingWs(endKeep.value), + delWsStart = leadingWs(deletion.value), + delWsEnd = trailingWs(deletion.value); // Any whitespace that comes straight after startKeep in both the old and // new texts, assign to startKeep and remove from the deletion. @@ -255,16 +257,16 @@ function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep // We are at the start of the text. Preserve all the whitespace on // endKeep, and just remove whitespace from the end of deletion to the // extent that it overlaps with the start of endKeep. - const endKeepWsPrefix = endKeep.value.match(/^\s*/)[0]; - const deletionWsSuffix = deletion.value.match(/\s*$/)[0]; + const endKeepWsPrefix = leadingWs(endKeep.value); + const deletionWsSuffix = trailingWs(deletion.value); const overlap = maximumOverlap(deletionWsSuffix, endKeepWsPrefix); deletion.value = removeSuffix(deletion.value, overlap); } else if (startKeep) { // We are at the END of the text. Preserve all the whitespace on // startKeep, and just remove whitespace from the start of deletion to // the extent that it overlaps with the end of startKeep. - const startKeepWsSuffix = startKeep.value.match(/\s*$/)[0]; - const deletionWsPrefix = deletion.value.match(/^\s*/)[0]; + const startKeepWsSuffix = trailingWs(startKeep.value); + const deletionWsPrefix = leadingWs(deletion.value); const overlap = maximumOverlap(startKeepWsSuffix, deletionWsPrefix); deletion.value = removePrefix(deletion.value, overlap); } diff --git a/src/util/string.js b/src/util/string.js index 53cb7089..80cf5478 100644 --- a/src/util/string.js +++ b/src/util/string.js @@ -101,3 +101,29 @@ export function hasOnlyWinLineEndings(string) { export function hasOnlyUnixLineEndings(string) { return !string.includes('\r\n') && string.includes('\n'); } + +export function trailingWs(string) { + // Yes, this looks overcomplicated and dumb - why not replace the whole function with + // return string match(/\s*$/)[0] + // you ask? Because: + // 1. the trap described at https://markamery.com/blog/quadratic-time-regexes/ would mean doing + // this would cause this function to take O(n²) time in the worst case (specifically when + // there is a massive run of NON-TRAILING whitespace in `string`), and + // 2. the fix proposed in the same blog post, of using a negative lookbehind, is incompatible + // with old Safari versions that we'd like to not break if possible (see + // https://github.com/kpdecker/jsdiff/pull/550) + // It feels absurd to do this with an explicit loop instead of a regex, but I really can't see a + // better way that doesn't result in broken behaviour. + let i; + for (i = string.length - 1; i >= 0; i--) { + if (!string[i].match(/\s/)) { + break; + } + } + return string.substring(i + 1); +} + +export function leadingWs(string) { + // Thankfully the annoying considerations described in trailingWs don't apply here: + return string.match(/^\s*/)[0]; +}