From 94de212bd64a41e3b09a655d4bd60b4e5c5d5dbc Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 22 May 2025 14:30:36 +0100 Subject: [PATCH] Fix broken diffWords test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Somehow I screwed up https://github.com/kpdecker/jsdiff/pull/613 and merged it with the tests failing and with the sentence I was actually using in the test inconsistent with the one I claimed to be using in the comment above. Also, even if I'd got it right, I wouldn't've actually avoided hitting the inconsistency in Intl.Segmenter's tokenization rules that that PR was specifically trying to avoid, because it considers 他有 (he has) to be one word; I should've used 她有 (she has) which the segmenter sees as two words. This fixes both mistakes. --- test/diff/word.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/diff/word.js b/test/diff/word.js index a28ec05a..f255f321 100644 --- a/test/diff/word.js +++ b/test/diff/word.js @@ -240,14 +240,14 @@ describe('WordDiff', function() { it('supports tokenizing with an Intl.Segmenter', () => { // Example 1: Diffing Chinese text with no spaces. - // a. "He (他) has (有) many (很多) tables (桌子)" + // a. "She (她) has (有) many (很多) tables (桌子)" // b. "Mei (梅) has (有) many (很多) sons (儿子)" // We want to see that diffWords will get the word counts right and won't try to treat the // trailing 子 as common to both texts (since it's part of a different word each time). const chineseSegmenter = new Intl.Segmenter('zh', {granularity: 'word'}); - const diffResult = diffWords('我有很多桌子。', '梅有很多儿子。', {intlSegmenter: chineseSegmenter}); + const diffResult = diffWords('她有很多桌子。', '梅有很多儿子。', {intlSegmenter: chineseSegmenter}); expect(diffResult).to.deep.equal([ - { count: 1, added: false, removed: true, value: '他' }, + { count: 1, added: false, removed: true, value: '她' }, { count: 1, added: true, removed: false, value: '梅' }, { count: 2, added: false, removed: false, value: '有很多' }, { count: 1, added: false, removed: true, value: '桌子' },