From c79a3604f385bb85e1b9f34ad5acc5c5bda3a989 Mon Sep 17 00:00:00 2001 From: Peter Zingg Date: Fri, 24 Jan 2025 12:09:15 -0800 Subject: [PATCH 1/3] tested on elixir 1.18.1 --- mix.exs | 4 ++-- mix.lock | 22 +++++++++++----------- test/diff_test.exs | 4 ++++ 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/mix.exs b/mix.exs index 4162222..0111f64 100644 --- a/mix.exs +++ b/mix.exs @@ -1,14 +1,14 @@ defmodule Dmp.MixProject do use Mix.Project - @version "0.3.0" + @version "0.3.1" @github_project_url "https://github.com/pzingg/diff_match_patch" def project do [ app: :diff_match_patch, version: @version, - elixir: "~> 1.12", + elixir: "~> 1.14", start_permanent: Mix.env() == :prod, deps: deps(), source_url: @github_project_url, diff --git a/mix.lock b/mix.lock index d1bc83c..2e381bd 100644 --- a/mix.lock +++ b/mix.lock @@ -1,14 +1,14 @@ %{ "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, - "credo": {:hex, :credo, "1.7.6", "b8f14011a5443f2839b04def0b252300842ce7388f3af177157c86da18dfbeea", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "146f347fb9f8cbc5f7e39e3f22f70acbef51d441baa6d10169dd604bfbc55296"}, - "dialyxir": {:hex, :dialyxir, "1.4.3", "edd0124f358f0b9e95bfe53a9fcf806d615d8f838e2202a9f430d59566b6b53b", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "bf2cfb75cd5c5006bec30141b131663299c661a864ec7fbbc72dfa557487a986"}, - "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, - "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, - "ex_doc": {:hex, :ex_doc, "0.32.2", "f60bbeb6ccbe75d005763e2a328e6f05e0624232f2393bc693611c2d3ae9fa0e", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "a4480305cdfe7fdfcbb77d1092c76161626d9a7aa4fb698aee745996e34602df"}, - "file_system": {:hex, :file_system, "1.0.0", "b689cc7dcee665f774de94b5a832e578bd7963c8e637ef940cd44327db7de2cd", [:mix], [], "hexpm", "6752092d66aec5a10e662aefeed8ddb9531d79db0bc145bb8c40325ca1d8536d"}, - "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, - "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, - "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, - "makeup_erlang": {:hex, :makeup_erlang, "1.0.0", "6f0eff9c9c489f26b69b61440bf1b238d95badae49adac77973cbacae87e3c2e", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "ea7a9307de9d1548d2a72d299058d1fd2339e3d398560a0e46c27dab4891e4d2"}, - "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, + "credo": {:hex, :credo, "1.7.11", "d3e805f7ddf6c9c854fd36f089649d7cf6ba74c42bc3795d587814e3c9847102", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "56826b4306843253a66e47ae45e98e7d284ee1f95d53d1612bb483f88a8cf219"}, + "dialyxir": {:hex, :dialyxir, "1.4.5", "ca1571ac18e0f88d4ab245f0b60fa31ff1b12cbae2b11bd25d207f865e8ae78a", [:mix], [{:erlex, ">= 0.2.7", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b0fb08bb8107c750db5c0b324fa2df5ceaa0f9307690ee3c1f6ba5b9eb5d35c3"}, + "earmark_parser": {:hex, :earmark_parser, "1.4.43", "34b2f401fe473080e39ff2b90feb8ddfeef7639f8ee0bbf71bb41911831d77c5", [:mix], [], "hexpm", "970a3cd19503f5e8e527a190662be2cee5d98eed1ff72ed9b3d1a3d466692de8"}, + "erlex": {:hex, :erlex, "0.2.7", "810e8725f96ab74d17aac676e748627a07bc87eb950d2b83acd29dc047a30595", [:mix], [], "hexpm", "3ed95f79d1a844c3f6bf0cea61e0d5612a42ce56da9c03f01df538685365efb0"}, + "ex_doc": {:hex, :ex_doc, "0.36.1", "4197d034f93e0b89ec79fac56e226107824adcce8d2dd0a26f5ed3a95efc36b1", [:mix], [{:earmark_parser, "~> 1.4.42", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "d7d26a7cf965dacadcd48f9fa7b5953d7d0cfa3b44fa7a65514427da44eafd89"}, + "file_system": {:hex, :file_system, "1.1.0", "08d232062284546c6c34426997dd7ef6ec9f8bbd090eb91780283c9016840e8f", [:mix], [], "hexpm", "bfcf81244f416871f2a2e15c1b515287faa5db9c6bcf290222206d120b3d43f6"}, + "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, + "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, + "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, + "makeup_erlang": {:hex, :makeup_erlang, "1.0.1", "c7f58c120b2b5aa5fd80d540a89fdf866ed42f1f3994e4fe189abebeab610839", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "8a89a1eeccc2d798d6ea15496a6e4870b75e014d1af514b1b71fa33134f57814"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, } diff --git a/test/diff_test.exs b/test/diff_test.exs index 281f40b..ccb91cd 100644 --- a/test/diff_test.exs +++ b/test/diff_test.exs @@ -644,6 +644,10 @@ defmodule DiffTest do Diff.main("a123b456c", "abc", false) end + test "short strings bug in Elixir 1.18.2" do + assert [{:equal, "A"}, {:delete, "B"}, {:equal, "C"}] = Dmp.Diff.main("ABC", "AC") + end + # Perform a real diff. # Switch off the timeout. From ac11c80fa41f60141726c959df4ef2c5d0839171 Mon Sep 17 00:00:00 2001 From: Peter Zingg Date: Fri, 24 Jan 2025 12:15:54 -0800 Subject: [PATCH 2/3] upgraded dep versions --- mix.exs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mix.exs b/mix.exs index 0111f64..1f158dc 100644 --- a/mix.exs +++ b/mix.exs @@ -102,9 +102,9 @@ defmodule Dmp.MixProject do [ # {:dep_from_hexpm, "~> 0.3.0"}, # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"} - {:credo, "~> 1.6", only: [:dev, :test], runtime: false}, - {:dialyxir, "~> 1.0", only: :dev, runtime: false}, - {:ex_doc, "~> 0.28", only: :dev, runtime: false} + {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, + {:dialyxir, "~> 1.4", only: :dev, runtime: false}, + {:ex_doc, "~> 0.36", only: :dev, runtime: false} ] end end From 8617198f80ea7643a1ceec51eb1474120ea604d9 Mon Sep 17 00:00:00 2001 From: Peter Zingg Date: Fri, 24 Jan 2025 14:41:23 -0800 Subject: [PATCH 3/3] use charlists for speedup --- lib/dmp/diff.ex | 50 +++++++++++++++++++++------------------------- test/diff_test.exs | 18 ++++++++++++++++- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/lib/dmp/diff.ex b/lib/dmp/diff.ex index 60fd84f..66029b6 100644 --- a/lib/dmp/diff.ex +++ b/lib/dmp/diff.ex @@ -759,19 +759,21 @@ defmodule Dmp.Diff do * `rest2` - `text2` with the prefix removed. """ @spec common_prefix(String.t(), String.t()) :: {String.t(), String.t(), String.t()} - def common_prefix(text1, text2), do: do_common_prefix("", text1, text2) - - defp do_common_prefix(prefix, "", text2), do: {prefix, "", text2} - defp do_common_prefix(prefix, text1, ""), do: {prefix, text1, ""} + def common_prefix(text1, text2) do + l1 = String.to_charlist(text1) + l2 = String.to_charlist(text2) + {prefix, r1, r2} = do_common_prefix([], l1, l2) + {Enum.reverse(prefix) |> to_string(), to_string(r1), to_string(r2)} + end - defp do_common_prefix(prefix, text1, text2) do - {t1, rem1} = String.next_grapheme(text1) - {t2, rem2} = String.next_grapheme(text2) + defp do_common_prefix(prefix, [], l2), do: {prefix, [], l2} + defp do_common_prefix(prefix, l1, []), do: {prefix, l1, []} + defp do_common_prefix(prefix, [t1 | rem1] = l1, [t2 | rem2] = l2) do if t1 == t2 do - do_common_prefix(prefix <> t1, rem1, rem2) + do_common_suffix([t1 | prefix], rem1, rem2) else - {prefix, text1, text2} + {prefix, l1, l2} end end @@ -789,26 +791,20 @@ defmodule Dmp.Diff do """ @spec common_suffix(String.t(), String.t()) :: {String.t(), String.t(), String.t()} def common_suffix(text1, text2) do - # Cache the text lengths to prevent multiple calls. - text1_length = String.length(text1) - text2_length = String.length(text2) - n = min(text1_length, text2_length) - - if n == 0 do - {"", text1, text2} - else - suffix = - Enum.reduce_while(1..n, "", fn i, acc -> - ch = String.at(text1, text1_length - i) + l1 = String.to_charlist(text1) |> Enum.reverse() + l2 = String.to_charlist(text2) |> Enum.reverse() + {suffix, r1, r2} = do_common_suffix([], l1, l2) + {to_string(suffix), Enum.reverse(r1) |> to_string(), Enum.reverse(r2) |> to_string()} + end - if ch == String.at(text2, text2_length - i) do - {:cont, ch <> acc} - else - {:halt, acc} - end - end) + defp do_common_suffix(suffix, [], l2), do: {suffix, [], l2} + defp do_common_suffix(suffix, l1, []), do: {suffix, l1, []} - {suffix, String.replace_suffix(text1, suffix, ""), String.replace_suffix(text2, suffix, "")} + defp do_common_suffix(suffix, [t1 | rem1] = l1, [t2 | rem2] = l2) do + if t1 == t2 do + do_common_suffix([t1 | suffix], rem1, rem2) + else + {suffix, l1, l2} end end diff --git a/test/diff_test.exs b/test/diff_test.exs index ccb91cd..ac73305 100644 --- a/test/diff_test.exs +++ b/test/diff_test.exs @@ -39,6 +39,14 @@ defmodule DiffTest do assert {"1234", "abcdef", "xyz"} == Diff.common_prefix("1234abcdef", "1234xyz") end + test "very long sinhala case" do + prefix = String.duplicate("එය එසේ වුවත්, විනෝදය හෙළා දැකීම සහ", 5000) + t1 = prefix <> "abcdef" + t2 = prefix <> "xyz" + assert {answer, "abcdef", "xyz"} = Diff.common_prefix(t1, t2) + assert answer == prefix + end + test "whole case" do assert {"1234", "", "xyz"} == Diff.common_prefix("1234", "1234xyz") end @@ -53,6 +61,14 @@ defmodule DiffTest do assert {"1234", "abcdef", "xyz"} == Diff.common_suffix("abcdef1234", "xyz1234") end + test "very long sinhala case" do + suffix = String.duplicate("එය එසේ වුවත්, විනෝදය හෙළා දැකීම සහ", 5000) + t1 = "abcdef" <> suffix + t2 = "xyz" <> suffix + assert {answer, "abcdef", "xyz"} = Diff.common_suffix(t1, t2) + assert answer == suffix + end + test "whole case" do assert {"1234", "", "xyz"} == Diff.common_suffix("1234", "xyz1234") end @@ -645,7 +661,7 @@ defmodule DiffTest do end test "short strings bug in Elixir 1.18.2" do - assert [{:equal, "A"}, {:delete, "B"}, {:equal, "C"}] = Dmp.Diff.main("ABC", "AC") + assert [{:equal, "A"}, {:delete, "B"}, {:equal, "C"}] = Dmp.Diff.main("ABC", "AC") end # Perform a real diff.