From 8da7274de409ba37c017980ccec0587726541302 Mon Sep 17 00:00:00 2001
From: Mark Amery <markrobertamery@gmail.com>
Date: Fri, 14 Feb 2025 20:32:14 +0000
Subject: [PATCH] Purge more quadratic-time regexes

---
 src/diff/word.js   | 30 ++++++++++++++++--------------
 src/util/string.js | 26 ++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/src/diff/word.js b/src/diff/word.js
index fd9284f8..3bd534ed 100644
--- a/src/diff/word.js
+++ b/src/diff/word.js
@@ -1,5 +1,5 @@
 import Diff from './base';
-import { longestCommonPrefix, longestCommonSuffix, replacePrefix, replaceSuffix, removePrefix, removeSuffix, maximumOverlap } from '../util/string';
+import { longestCommonPrefix, longestCommonSuffix, replacePrefix, replaceSuffix, removePrefix, removeSuffix, maximumOverlap, leadingWs, trailingWs } from '../util/string';
 
 // Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode
 //
@@ -193,10 +193,10 @@ function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep
   // * Just a "delete"
   // We handle the three cases separately.
   if (deletion && insertion) {
-    const oldWsPrefix = deletion.value.match(/^\s*/)[0];
-    const oldWsSuffix = deletion.value.match(/\s*$/)[0];
-    const newWsPrefix = insertion.value.match(/^\s*/)[0];
-    const newWsSuffix = insertion.value.match(/\s*$/)[0];
+    const oldWsPrefix = leadingWs(deletion.value);
+    const oldWsSuffix = trailingWs(deletion.value);
+    const newWsPrefix = leadingWs(insertion.value);
+    const newWsSuffix = trailingWs(insertion.value);
 
     if (startKeep) {
       const commonWsPrefix = longestCommonPrefix(oldWsPrefix, newWsPrefix);
@@ -218,16 +218,18 @@ function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep
     // whitespace and deleting duplicate leading whitespace where
     // present.
     if (startKeep) {
-      insertion.value = insertion.value.replace(/^\s*/, '');
+      const ws = leadingWs(insertion.value);
+      insertion.value = insertion.value.substring(ws.length);
     }
     if (endKeep) {
-      endKeep.value = endKeep.value.replace(/^\s*/, '');
+      const ws = leadingWs(endKeep.value);
+      endKeep.value = endKeep.value.substring(ws.length);
     }
   // otherwise we've got a deletion and no insertion
   } else if (startKeep && endKeep) {
-    const newWsFull = endKeep.value.match(/^\s*/)[0],
-        delWsStart = deletion.value.match(/^\s*/)[0],
-        delWsEnd = deletion.value.match(/\s*$/)[0];
+    const newWsFull = leadingWs(endKeep.value),
+        delWsStart = leadingWs(deletion.value),
+        delWsEnd = trailingWs(deletion.value);
 
     // Any whitespace that comes straight after startKeep in both the old and
     // new texts, assign to startKeep and remove from the deletion.
@@ -255,16 +257,16 @@ function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep
     // We are at the start of the text. Preserve all the whitespace on
     // endKeep, and just remove whitespace from the end of deletion to the
     // extent that it overlaps with the start of endKeep.
-    const endKeepWsPrefix = endKeep.value.match(/^\s*/)[0];
-    const deletionWsSuffix = deletion.value.match(/\s*$/)[0];
+    const endKeepWsPrefix = leadingWs(endKeep.value);
+    const deletionWsSuffix = trailingWs(deletion.value);
     const overlap = maximumOverlap(deletionWsSuffix, endKeepWsPrefix);
     deletion.value = removeSuffix(deletion.value, overlap);
   } else if (startKeep) {
     // We are at the END of the text. Preserve all the whitespace on
     // startKeep, and just remove whitespace from the start of deletion to
     // the extent that it overlaps with the end of startKeep.
-    const startKeepWsSuffix = startKeep.value.match(/\s*$/)[0];
-    const deletionWsPrefix = deletion.value.match(/^\s*/)[0];
+    const startKeepWsSuffix = trailingWs(startKeep.value);
+    const deletionWsPrefix = leadingWs(deletion.value);
     const overlap = maximumOverlap(startKeepWsSuffix, deletionWsPrefix);
     deletion.value = removePrefix(deletion.value, overlap);
   }
diff --git a/src/util/string.js b/src/util/string.js
index 53cb7089..80cf5478 100644
--- a/src/util/string.js
+++ b/src/util/string.js
@@ -101,3 +101,29 @@ export function hasOnlyWinLineEndings(string) {
 export function hasOnlyUnixLineEndings(string) {
   return !string.includes('\r\n') && string.includes('\n');
 }
+
+export function trailingWs(string) {
+  // Yes, this looks overcomplicated and dumb - why not replace the whole function with
+  //     return string match(/\s*$/)[0]
+  // you ask? Because:
+  // 1. the trap described at https://markamery.com/blog/quadratic-time-regexes/ would mean doing
+  //    this would cause this function to take O(n²) time in the worst case (specifically when
+  //    there is a massive run of NON-TRAILING whitespace in `string`), and
+  // 2. the fix proposed in the same blog post, of using a negative lookbehind, is incompatible
+  //    with old Safari versions that we'd like to not break if possible (see
+  //    https://github.com/kpdecker/jsdiff/pull/550)
+  // It feels absurd to do this with an explicit loop instead of a regex, but I really can't see a
+  // better way that doesn't result in broken behaviour.
+  let i;
+  for (i = string.length - 1; i >= 0; i--) {
+    if (!string[i].match(/\s/)) {
+      break;
+    }
+  }
+  return string.substring(i + 1);
+}
+
+export function leadingWs(string) {
+  // Thankfully the annoying considerations described in trailingWs don't apply here:
+  return string.match(/^\s*/)[0];
+}