From 732adb87c90283bd8f8fce6d633eacc25e10b353 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 May 2025 05:12:25 +0200 Subject: [PATCH 1/4] update `imara-diff` to the latest version. --- gix-diff/Cargo.toml | 2 +- gix-merge/Cargo.toml | 2 +- gix-merge/fuzz/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gix-diff/Cargo.toml b/gix-diff/Cargo.toml index c3f0cd92078..e251aa5d33e 100644 --- a/gix-diff/Cargo.toml +++ b/gix-diff/Cargo.toml @@ -42,7 +42,7 @@ gix-trace = { version = "^0.1.12", path = "../gix-trace", optional = true } gix-traverse = { version = "^0.46.2", path = "../gix-traverse", optional = true } thiserror = "2.0.0" -imara-diff = { version = "0.1.7", optional = true } +imara-diff = { version = "0.1.8", optional = true } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } getrandom = { version = "0.2.8", optional = true, default-features = false, features = ["js"] } bstr = { version = "1.12.0", default-features = false } diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml index 23976b05169..2a0faaa6431 100644 --- a/gix-merge/Cargo.toml +++ b/gix-merge/Cargo.toml @@ -35,7 +35,7 @@ gix-diff = { version = "^0.52.1", path = "../gix-diff", default-features = false gix-index = { version = "^0.40.1", path = "../gix-index" } thiserror = "2.0.0" -imara-diff = { version = "0.1.7" } +imara-diff = { version = "0.1.8" } bstr = { version = "1.12.0", default-features = false } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } diff --git a/gix-merge/fuzz/Cargo.toml b/gix-merge/fuzz/Cargo.toml index 881b91cb8fd..c39a2b91fa2 100644 --- a/gix-merge/fuzz/Cargo.toml +++ b/gix-merge/fuzz/Cargo.toml @@ -12,7 +12,7 @@ cargo-fuzz = true anyhow = "1.0.76" libfuzzer-sys = "0.4" arbitrary = { version = "1.3.2", features = ["derive"] } -imara-diff = { version = "0.1.7" } +imara-diff = { version = "0.1.8" } gix-merge = { path = ".." } # Prevent this from interfering with workspaces From c84296b98876539e1b21072f32c0339932215f42 Mon Sep 17 00:00:00 2001 From: blinxen Date: Fri, 16 May 2025 05:04:03 +0200 Subject: [PATCH 2/4] feat: Add `Sink` that implements git's diffing improvement heuristics --- gix-diff/src/blob/git_diff.rs | 400 ++++++++++++++++++++++++++++++++++ gix-diff/src/blob/mod.rs | 3 + 2 files changed, 403 insertions(+) create mode 100644 gix-diff/src/blob/git_diff.rs diff --git a/gix-diff/src/blob/git_diff.rs b/gix-diff/src/blob/git_diff.rs new file mode 100644 index 00000000000..b45fa2b65c7 --- /dev/null +++ b/gix-diff/src/blob/git_diff.rs @@ -0,0 +1,400 @@ +//! Facilities to produce git-formatted diffs. + +use std::cmp::Ordering; +use std::fmt::Display; +use std::ops::Range; + +use imara_diff::intern::{InternedInput, Interner, Token}; +use imara_diff::Sink; + +// Explanation for the following numbers can be found here: +// https://github.com/git/git/blob/324fbaab88126196bd42e7fa383ee94e165d61b5/xdiff/xdiffi.c#L535 +const MAX_INDENT: u8 = 200; +const MAX_BLANKS: i16 = 20; +const INDENT_WEIGHT: i16 = 60; +const INDENT_HEURISTIC_MAX_SLIDING: usize = 100; + +const START_OF_FILE_PENALTY: i16 = 1; +const END_OF_FILE_PENALTY: i16 = 21; +const TOTAL_BLANK_WEIGHT: i16 = -30; +const POST_BLANK_WEIGHT: i16 = 6; +const RELATIVE_INDENT_PENALTY: i16 = -4; +const RELATIVE_INDENT_WITH_BLANK_PENALTY: i16 = 10; +const RELATIVE_OUTDENT_PENALTY: i16 = 24; +const RELATIVE_OUTDENT_WITH_BLANK_PENALTY: i16 = 17; +const RELATIVE_DEDENT_PENALTY: i16 = 23; +const RELATIVE_DEDENT_WITH_BLANK_PENALTY: i16 = 17; + +/// An enum indicating the kind of change that occurred. +#[derive(PartialEq, Debug)] +pub enum ChangeKind { + /// Indicates that a change introduced new lines. + Added, + /// Indicates that a change removed lines before the starting line of the change. + RemovedAbove, + /// Indicates that a change removed lines after the ending line of the change. + RemovedBelow, + /// Indicates that the change modified lines. + Modified, +} + +#[derive(PartialEq)] +struct Score { + effective_indent: i16, + penalty: i16, +} + +impl PartialOrd for Score { + // A score is considered "Greater" if it is equal or less than 0 + fn partial_cmp(&self, other: &Self) -> Option { + let indent_penalty = match self.effective_indent.cmp(&other.effective_indent) { + Ordering::Greater => INDENT_WEIGHT, + Ordering::Less => -INDENT_WEIGHT, + Ordering::Equal => 0, + }; + + Some((indent_penalty + (self.penalty - other.penalty)).cmp(&0).reverse()) + } +} + +/// A [`ChangeGroup`] represents a block of changed lines. +#[derive(PartialEq, Debug)] +pub struct ChangeGroup { + /// Range indicating the lines of the previous block. + /// To actually see how the previous block looked like, you need to combine this range with + /// the [`InternedInput`]. + before: Range, + /// Range indicating the lines of the new block + /// To actually see how the current block looks like, you need to combine this range with + /// the [`InternedInput`]. + after: Range, + change_kind: ChangeKind, +} + +/// A [`Sink`] that creates a diff like git would. +pub struct GitDiff<'a, T> +where + T: Display, +{ + after: &'a [Token], + interner: &'a Interner, + changes: Vec, +} + +// Calculate the indentation of a single line +fn get_indent(s: String) -> Option { + let mut indent = 0; + + for char in s.chars() { + if !char.is_whitespace() { + return Some(indent); + } else if char == ' ' { + indent += 1; + } else if char == '\t' { + indent += 8 - indent % 8; + } + + if indent >= MAX_INDENT { + return Some(MAX_INDENT); + } + } + + None +} + +fn measure_and_score_change(lines: &[Token], split: usize, interner: &Interner, score: &mut Score) { + // Gather information about the surroundings of the change + let end_of_file = split >= lines.len(); + let mut indent: Option = if split >= lines.len() { + None + } else { + get_indent(interner[lines[split]].to_string()) + }; + let mut pre_blank = 0; + let mut pre_indent: Option = None; + let mut post_blank = 0; + let mut post_indent: Option = None; + + for line in (0..=split.saturating_sub(1)).rev() { + pre_indent = get_indent(interner[lines[line]].to_string()); + if pre_indent.is_none() { + pre_blank += 1; + if pre_blank == MAX_BLANKS { + pre_indent = Some(0); + break; + } + } + } + for line in split + 1..lines.len() { + post_indent = get_indent(interner[lines[line]].to_string()); + if post_indent.is_none() { + post_blank += 1; + if post_blank == MAX_BLANKS { + post_indent = Some(0); + break; + } + } + } + + // Calculate score of the currently applied split + post_blank = if indent.is_none() { 1 + post_blank } else { 0 }; + let total_blank = pre_blank + post_blank; + if indent.is_none() { + indent = post_indent; + } + let any_blanks = total_blank != 0; + + if pre_indent.is_none() && pre_blank == 0 { + score.penalty += START_OF_FILE_PENALTY; + } + + if end_of_file { + score.penalty += END_OF_FILE_PENALTY; + } + + score.penalty += TOTAL_BLANK_WEIGHT * total_blank; + score.penalty += POST_BLANK_WEIGHT * post_blank; + + score.effective_indent += if let Some(indent) = indent { indent as i16 } else { -1 }; + + if indent.is_none() || pre_indent.is_none() || indent == pre_indent { + } else if indent > pre_indent { + score.penalty += if any_blanks { + RELATIVE_INDENT_WITH_BLANK_PENALTY + } else { + RELATIVE_INDENT_PENALTY + }; + } else if post_indent.is_some() && post_indent > indent { + score.penalty += if any_blanks { + RELATIVE_OUTDENT_WITH_BLANK_PENALTY + } else { + RELATIVE_OUTDENT_PENALTY + }; + } else { + score.penalty += if any_blanks { + RELATIVE_DEDENT_WITH_BLANK_PENALTY + } else { + RELATIVE_DEDENT_PENALTY + }; + } +} + +impl<'a, T> GitDiff<'a, T> +where + T: Display, +{ + /// Create a new instance of [`GitDiff`] that can then be passed to [`imara_diff::diff`] + /// and generate a more human-readable diff. + pub fn new(input: &'a InternedInput) -> Self { + Self { + after: &input.after, + interner: &input.interner, + changes: Vec::new(), + } + } +} + +impl Sink for GitDiff<'_, T> +where + T: Display, +{ + type Out = Vec; + + fn process_change(&mut self, before: Range, after: Range) { + if before.is_empty() && !after.is_empty() { + self.changes.push(ChangeGroup { + before: before.start as usize..before.end as usize, + after: after.start as usize..after.end as usize, + change_kind: ChangeKind::Added, + }); + } else if after.is_empty() && !before.is_empty() { + if after.start == 0 { + self.changes.push(ChangeGroup { + before: before.start as usize..before.end as usize, + after: after.start as usize..after.end as usize, + change_kind: ChangeKind::RemovedAbove, + }); + } else { + self.changes.push(ChangeGroup { + before: before.start as usize..before.end as usize, + after: after.start as usize..after.end as usize, + change_kind: ChangeKind::RemovedBelow, + }); + } + } else { + self.changes.push(ChangeGroup { + before: before.start as usize..before.end as usize, + after: after.start as usize..after.end as usize, + change_kind: ChangeKind::Modified, + }); + } + } + + fn finish(mut self) -> Self::Out { + if self.changes.is_empty() { + return self.changes; + } + let mut shift: usize; + + for change in self.changes.iter_mut() { + // Skip one liner changes + if change.after.is_empty() { + continue; + } + + // Move this change up by one line if the line before the change and the last line in + // the change are equal + loop { + if change.after.start > 0 && self.after[change.after.start - 1] == self.after[change.after.end - 1] { + change.after.start -= 1; + change.after.end -= 1; + } else { + break; + } + } + + shift = change.after.end; + + // Move this change down by one line if the first line in the change the line after the + // change are equal + loop { + if change.after.end < self.after.len() && self.after[change.after.start] == self.after[change.after.end] + { + change.after.start += 1; + change.after.end += 1; + } else { + break; + } + } + + let mut best_shift: Option = None; + let mut best_score = Score { + effective_indent: 0, + penalty: 0, + }; + + if change.after.end.saturating_sub(change.after.len()) > shift { + shift = change.after.end - change.after.len(); + } + + if change.after.end.saturating_sub(INDENT_HEURISTIC_MAX_SLIDING) > shift { + shift = change.after.end - INDENT_HEURISTIC_MAX_SLIDING; + } + + while shift <= change.after.end { + let mut score = Score { + effective_indent: 0, + penalty: 0, + }; + + measure_and_score_change(self.after, shift, self.interner, &mut score); + measure_and_score_change(self.after, shift - change.after.len(), self.interner, &mut score); + + if best_shift.is_none() || score > best_score { + best_score = score; + best_shift = Some(shift); + } + shift += 1; + } + + if let Some(best_shift) = best_shift { + while change.after.end > best_shift { + loop { + if change.after.start > 0 + && self.after[change.after.start - 1] == self.after[change.after.end - 1] + { + change.after.start -= 1; + change.after.end -= 1; + } else { + break; + } + } + } + } + } + + self.changes + } +} + +#[test] +fn git_diff_test() { + let before = r#"struct SomeStruct { + field1: f64, + field2: f64, +} + +fn main() { + // Some comment + let c = SomeStruct { field1: 10.0, field2: 10.0 }; + + println!( + "Print field1 from SomeStruct {}", + get_field1(&c) + ); +} + +fn get_field1(c: &SomeStruct) -> f64 { + c.field1 +} +"#; + + let after = r#"/// This is a struct +struct SomeStruct { + field1: f64, + field2: f64, +} + +fn main() { + let c = SomeStruct { field1: 10.0, field2: 10.0 }; + + println!( + "Print field1 and field2 from SomeStruct {} {}", + get_field1(&c), get_field2(&c) + ); + println!("Print another line"); +} + +fn get_field1(c: &SomeStruct) -> f64 { + c.field1 +} + +fn get_field2(c: &SomeStruct) -> f64 { + c.field2 +} +"#; + use crate::blob::git_diff::ChangeKind; + + let input = InternedInput::new(before, after); + let diff = imara_diff::diff(imara_diff::Algorithm::Histogram, &input, GitDiff::new(&input)); + assert_eq!( + diff, + vec![ + ChangeGroup { + before: 0..0, + after: 0..1, + change_kind: ChangeKind::Added + }, + ChangeGroup { + before: 6..7, + after: 7..7, + change_kind: ChangeKind::RemovedBelow + }, + ChangeGroup { + before: 10..12, + after: 10..12, + change_kind: ChangeKind::Modified + }, + ChangeGroup { + before: 13..13, + after: 13..14, + change_kind: ChangeKind::Added + }, + ChangeGroup { + before: 17..17, + after: 19..23, + change_kind: ChangeKind::Added + } + ] + ); +} diff --git a/gix-diff/src/blob/mod.rs b/gix-diff/src/blob/mod.rs index 541e978d752..889f04c503c 100644 --- a/gix-diff/src/blob/mod.rs +++ b/gix-diff/src/blob/mod.rs @@ -14,6 +14,9 @@ pub mod platform; pub mod unified_diff; pub use unified_diff::_impl::UnifiedDiff; +pub mod git_diff; +pub use git_diff::*; + /// Information about the diff performed to detect similarity. #[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)] pub struct DiffLineStats { From 5e699d26846740ee098e780bb2b861fcae2d31ca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 May 2025 05:14:47 +0200 Subject: [PATCH 3/4] refactor adjust Git sink to fit into to conform to project convetions --- gix-diff/src/blob/git_diff.rs | 173 +++++++-------------------- gix-diff/src/blob/mod.rs | 2 +- gix-diff/tests/diff/blob/git_diff.rs | 87 ++++++++++++++ gix-diff/tests/diff/blob/mod.rs | 1 + 4 files changed, 134 insertions(+), 129 deletions(-) create mode 100644 gix-diff/tests/diff/blob/git_diff.rs diff --git a/gix-diff/src/blob/git_diff.rs b/gix-diff/src/blob/git_diff.rs index b45fa2b65c7..8ddeed5e039 100644 --- a/gix-diff/src/blob/git_diff.rs +++ b/gix-diff/src/blob/git_diff.rs @@ -1,9 +1,9 @@ //! Facilities to produce git-formatted diffs. use std::cmp::Ordering; -use std::fmt::Display; use std::ops::Range; +use crate::blob::GitDiff; use imara_diff::intern::{InternedInput, Interner, Token}; use imara_diff::Sink; @@ -25,6 +25,20 @@ const RELATIVE_OUTDENT_WITH_BLANK_PENALTY: i16 = 17; const RELATIVE_DEDENT_PENALTY: i16 = 23; const RELATIVE_DEDENT_WITH_BLANK_PENALTY: i16 = 17; +pub(super) mod types { + use crate::blob::git_diff::ChangeGroup; + + /// A [`Sink`](imara_diff::Sink) that creates a diff like git would. + pub struct GitDiff<'a, T> + where + T: std::fmt::Display, + { + pub(crate) after: &'a [imara_diff::intern::Token], + pub(crate) interner: &'a imara_diff::intern::Interner, + pub(crate) changes: Vec, + } +} + /// An enum indicating the kind of change that occurred. #[derive(PartialEq, Debug)] pub enum ChangeKind { @@ -63,22 +77,13 @@ pub struct ChangeGroup { /// Range indicating the lines of the previous block. /// To actually see how the previous block looked like, you need to combine this range with /// the [`InternedInput`]. - before: Range, + pub before: Range, /// Range indicating the lines of the new block /// To actually see how the current block looks like, you need to combine this range with /// the [`InternedInput`]. - after: Range, - change_kind: ChangeKind, -} - -/// A [`Sink`] that creates a diff like git would. -pub struct GitDiff<'a, T> -where - T: Display, -{ - after: &'a [Token], - interner: &'a Interner, - changes: Vec, + pub after: Range, + /// Further specify what kind of change is denoted by the ranges above. + pub change_kind: ChangeKind, } // Calculate the indentation of a single line @@ -102,7 +107,12 @@ fn get_indent(s: String) -> Option { None } -fn measure_and_score_change(lines: &[Token], split: usize, interner: &Interner, score: &mut Score) { +fn measure_and_score_change( + lines: &[Token], + split: usize, + interner: &Interner, + score: &mut Score, +) { // Gather information about the surroundings of the change let end_of_file = split >= lines.len(); let mut indent: Option = if split >= lines.len() { @@ -181,7 +191,7 @@ fn measure_and_score_change(lines: &[Token], split: usize, interner: impl<'a, T> GitDiff<'a, T> where - T: Display, + T: std::fmt::Display, { /// Create a new instance of [`GitDiff`] that can then be passed to [`imara_diff::diff`] /// and generate a more human-readable diff. @@ -196,48 +206,37 @@ where impl Sink for GitDiff<'_, T> where - T: Display, + T: std::fmt::Display, { type Out = Vec; fn process_change(&mut self, before: Range, after: Range) { - if before.is_empty() && !after.is_empty() { - self.changes.push(ChangeGroup { - before: before.start as usize..before.end as usize, - after: after.start as usize..after.end as usize, - change_kind: ChangeKind::Added, - }); - } else if after.is_empty() && !before.is_empty() { - if after.start == 0 { - self.changes.push(ChangeGroup { - before: before.start as usize..before.end as usize, - after: after.start as usize..after.end as usize, - change_kind: ChangeKind::RemovedAbove, - }); - } else { - self.changes.push(ChangeGroup { - before: before.start as usize..before.end as usize, - after: after.start as usize..after.end as usize, - change_kind: ChangeKind::RemovedBelow, - }); + let change_kind = match (before.is_empty(), after.is_empty()) { + (true, false) => ChangeKind::Added, + (false, true) => { + if after.start == 0 { + ChangeKind::RemovedAbove + } else { + ChangeKind::RemovedBelow + } } - } else { - self.changes.push(ChangeGroup { - before: before.start as usize..before.end as usize, - after: after.start as usize..after.end as usize, - change_kind: ChangeKind::Modified, - }); - } + _ => ChangeKind::Modified, + }; + self.changes.push(ChangeGroup { + before: before.start as usize..before.end as usize, + after: after.start as usize..after.end as usize, + change_kind, + }); } fn finish(mut self) -> Self::Out { if self.changes.is_empty() { return self.changes; } - let mut shift: usize; - for change in self.changes.iter_mut() { - // Skip one liner changes + let mut shift: usize; + for change in &mut self.changes { + // Skip one-liner changes if change.after.is_empty() { continue; } @@ -316,85 +315,3 @@ where self.changes } } - -#[test] -fn git_diff_test() { - let before = r#"struct SomeStruct { - field1: f64, - field2: f64, -} - -fn main() { - // Some comment - let c = SomeStruct { field1: 10.0, field2: 10.0 }; - - println!( - "Print field1 from SomeStruct {}", - get_field1(&c) - ); -} - -fn get_field1(c: &SomeStruct) -> f64 { - c.field1 -} -"#; - - let after = r#"/// This is a struct -struct SomeStruct { - field1: f64, - field2: f64, -} - -fn main() { - let c = SomeStruct { field1: 10.0, field2: 10.0 }; - - println!( - "Print field1 and field2 from SomeStruct {} {}", - get_field1(&c), get_field2(&c) - ); - println!("Print another line"); -} - -fn get_field1(c: &SomeStruct) -> f64 { - c.field1 -} - -fn get_field2(c: &SomeStruct) -> f64 { - c.field2 -} -"#; - use crate::blob::git_diff::ChangeKind; - - let input = InternedInput::new(before, after); - let diff = imara_diff::diff(imara_diff::Algorithm::Histogram, &input, GitDiff::new(&input)); - assert_eq!( - diff, - vec![ - ChangeGroup { - before: 0..0, - after: 0..1, - change_kind: ChangeKind::Added - }, - ChangeGroup { - before: 6..7, - after: 7..7, - change_kind: ChangeKind::RemovedBelow - }, - ChangeGroup { - before: 10..12, - after: 10..12, - change_kind: ChangeKind::Modified - }, - ChangeGroup { - before: 13..13, - after: 13..14, - change_kind: ChangeKind::Added - }, - ChangeGroup { - before: 17..17, - after: 19..23, - change_kind: ChangeKind::Added - } - ] - ); -} diff --git a/gix-diff/src/blob/mod.rs b/gix-diff/src/blob/mod.rs index 889f04c503c..8adf1aaeffa 100644 --- a/gix-diff/src/blob/mod.rs +++ b/gix-diff/src/blob/mod.rs @@ -15,7 +15,7 @@ pub mod unified_diff; pub use unified_diff::_impl::UnifiedDiff; pub mod git_diff; -pub use git_diff::*; +pub use git_diff::types::GitDiff; /// Information about the diff performed to detect similarity. #[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)] diff --git a/gix-diff/tests/diff/blob/git_diff.rs b/gix-diff/tests/diff/blob/git_diff.rs new file mode 100644 index 00000000000..002b1395667 --- /dev/null +++ b/gix-diff/tests/diff/blob/git_diff.rs @@ -0,0 +1,87 @@ +use gix_diff::blob::intern::InternedInput; +use gix_diff::blob::{ + git_diff::{ChangeGroup, ChangeKind}, + Algorithm, GitDiff, +}; + +#[test] +fn basic() { + let before = r#"struct SomeStruct { + field1: f64, + field2: f64, +} + +fn main() { + // Some comment + let c = SomeStruct { field1: 10.0, field2: 10.0 }; + + println!( + "Print field1 from SomeStruct {}", + get_field1(&c) + ); +} + +fn get_field1(c: &SomeStruct) -> f64 { + c.field1 +} +"#; + + let after = r#"/// This is a struct +struct SomeStruct { + field1: f64, + field2: f64, +} + +fn main() { + let c = SomeStruct { field1: 10.0, field2: 10.0 }; + + println!( + "Print field1 and field2 from SomeStruct {} {}", + get_field1(&c), get_field2(&c) + ); + println!("Print another line"); +} + +fn get_field1(c: &SomeStruct) -> f64 { + c.field1 +} + +fn get_field2(c: &SomeStruct) -> f64 { + c.field2 +} +"#; + use crate::blob::git_diff::ChangeKind; + + let input = InternedInput::new(before, after); + let diff = gix_diff::blob::diff(Algorithm::Histogram, &input, GitDiff::new(&input)); + assert_eq!( + diff, + vec![ + ChangeGroup { + before: 0..0, + after: 0..1, + change_kind: ChangeKind::Added + }, + ChangeGroup { + before: 6..7, + after: 7..7, + change_kind: ChangeKind::RemovedBelow + }, + ChangeGroup { + before: 10..12, + after: 10..12, + change_kind: ChangeKind::Modified + }, + ChangeGroup { + before: 13..13, + after: 13..14, + change_kind: ChangeKind::Added + }, + ChangeGroup { + before: 17..17, + after: 19..23, + change_kind: ChangeKind::Added + } + ] + ); +} diff --git a/gix-diff/tests/diff/blob/mod.rs b/gix-diff/tests/diff/blob/mod.rs index 1959c4e6fdb..49f1345a1de 100644 --- a/gix-diff/tests/diff/blob/mod.rs +++ b/gix-diff/tests/diff/blob/mod.rs @@ -1,3 +1,4 @@ +mod git_diff; pub(crate) mod pipeline; mod platform; mod unified_diff; From 55056467e8b1d2ee327e4f29df058224fb64db5e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 May 2025 21:21:48 +0200 Subject: [PATCH 4/4] Make `GitDiff` compatible to strings and bytes. --- gix-diff/src/blob/git_diff.rs | 53 +++++----- gix-diff/tests/diff/blob/git_diff.rs | 143 ++++++++++++++++++++------- 2 files changed, 141 insertions(+), 55 deletions(-) diff --git a/gix-diff/src/blob/git_diff.rs b/gix-diff/src/blob/git_diff.rs index 8ddeed5e039..7cd148b9735 100644 --- a/gix-diff/src/blob/git_diff.rs +++ b/gix-diff/src/blob/git_diff.rs @@ -1,11 +1,11 @@ //! Facilities to produce git-formatted diffs. -use std::cmp::Ordering; -use std::ops::Range; - use crate::blob::GitDiff; +use bstr::ByteSlice; use imara_diff::intern::{InternedInput, Interner, Token}; use imara_diff::Sink; +use std::cmp::Ordering; +use std::ops::Range; // Explanation for the following numbers can be found here: // https://github.com/git/git/blob/324fbaab88126196bd42e7fa383ee94e165d61b5/xdiff/xdiffi.c#L535 @@ -29,9 +29,11 @@ pub(super) mod types { use crate::blob::git_diff::ChangeGroup; /// A [`Sink`](imara_diff::Sink) that creates a diff like git would. + /// + /// See the [diff slider repository](https://github.com/mhagger/diff-slider-tools) for more information. pub struct GitDiff<'a, T> where - T: std::fmt::Display, + T: AsRef<[u8]>, { pub(crate) after: &'a [imara_diff::intern::Token], pub(crate) interner: &'a imara_diff::intern::Interner, @@ -75,7 +77,7 @@ impl PartialOrd for Score { #[derive(PartialEq, Debug)] pub struct ChangeGroup { /// Range indicating the lines of the previous block. - /// To actually see how the previous block looked like, you need to combine this range with + /// To actually see what the previous block looked like, you need to combine this range with /// the [`InternedInput`]. pub before: Range, /// Range indicating the lines of the new block @@ -86,16 +88,28 @@ pub struct ChangeGroup { pub change_kind: ChangeKind, } -// Calculate the indentation of a single line -fn get_indent(s: String) -> Option { +impl ChangeGroup { + /// Return [before](Self::before) and [after](Self::after) as `u32` ranges for use in [Sink::process_change()]. + /// + /// This is useful for creating [unified diffs](crate::blob::UnifiedDiff), for example. + pub fn as_u32_ranges(&self) -> (Range, Range) { + ( + self.before.start as u32..self.before.end as u32, + self.after.start as u32..self.after.end as u32, + ) + } +} + +// Calculate the indentation of a single line as number of tabs. +fn get_indent(s: &[u8]) -> Option { let mut indent = 0; - for char in s.chars() { - if !char.is_whitespace() { + for char in s.bytes() { + if !char.is_ascii_whitespace() { return Some(indent); - } else if char == ' ' { + } else if char == b' ' { indent += 1; - } else if char == '\t' { + } else if char == b'\t' { indent += 8 - indent % 8; } @@ -107,18 +121,13 @@ fn get_indent(s: String) -> Option { None } -fn measure_and_score_change( - lines: &[Token], - split: usize, - interner: &Interner, - score: &mut Score, -) { +fn measure_and_score_change>(lines: &[Token], split: usize, interner: &Interner, score: &mut Score) { // Gather information about the surroundings of the change let end_of_file = split >= lines.len(); let mut indent: Option = if split >= lines.len() { None } else { - get_indent(interner[lines[split]].to_string()) + get_indent(interner[lines[split]].as_ref()) }; let mut pre_blank = 0; let mut pre_indent: Option = None; @@ -126,7 +135,7 @@ fn measure_and_score_change( let mut post_indent: Option = None; for line in (0..=split.saturating_sub(1)).rev() { - pre_indent = get_indent(interner[lines[line]].to_string()); + pre_indent = get_indent(interner[lines[line]].as_ref()); if pre_indent.is_none() { pre_blank += 1; if pre_blank == MAX_BLANKS { @@ -136,7 +145,7 @@ fn measure_and_score_change( } } for line in split + 1..lines.len() { - post_indent = get_indent(interner[lines[line]].to_string()); + post_indent = get_indent(interner[lines[line]].as_ref()); if post_indent.is_none() { post_blank += 1; if post_blank == MAX_BLANKS { @@ -191,7 +200,7 @@ fn measure_and_score_change( impl<'a, T> GitDiff<'a, T> where - T: std::fmt::Display, + T: AsRef<[u8]>, { /// Create a new instance of [`GitDiff`] that can then be passed to [`imara_diff::diff`] /// and generate a more human-readable diff. @@ -206,7 +215,7 @@ where impl Sink for GitDiff<'_, T> where - T: std::fmt::Display, + T: AsRef<[u8]>, { type Out = Vec; diff --git a/gix-diff/tests/diff/blob/git_diff.rs b/gix-diff/tests/diff/blob/git_diff.rs index 002b1395667..54199344981 100644 --- a/gix-diff/tests/diff/blob/git_diff.rs +++ b/gix-diff/tests/diff/blob/git_diff.rs @@ -1,11 +1,13 @@ use gix_diff::blob::intern::InternedInput; +use gix_diff::blob::unified_diff::{ContextSize, NewlineSeparator}; use gix_diff::blob::{ git_diff::{ChangeGroup, ChangeKind}, - Algorithm, GitDiff, + Algorithm, GitDiff, Sink, UnifiedDiff, }; +use std::hash::Hash; #[test] -fn basic() { +fn basic() -> crate::Result { let before = r#"struct SomeStruct { field1: f64, field2: f64, @@ -52,36 +54,111 @@ fn get_field2(c: &SomeStruct) -> f64 { "#; use crate::blob::git_diff::ChangeKind; + let expected = &[ + ChangeGroup { + before: 0..0, + after: 0..1, + change_kind: ChangeKind::Added, + }, + ChangeGroup { + before: 6..7, + after: 7..7, + change_kind: ChangeKind::RemovedBelow, + }, + ChangeGroup { + before: 10..12, + after: 10..12, + change_kind: ChangeKind::Modified, + }, + ChangeGroup { + before: 13..13, + after: 13..14, + change_kind: ChangeKind::Added, + }, + ChangeGroup { + before: 17..17, + after: 19..23, + change_kind: ChangeKind::Added, + }, + ]; + let input = InternedInput::new(before, after); - let diff = gix_diff::blob::diff(Algorithm::Histogram, &input, GitDiff::new(&input)); - assert_eq!( - diff, - vec![ - ChangeGroup { - before: 0..0, - after: 0..1, - change_kind: ChangeKind::Added - }, - ChangeGroup { - before: 6..7, - after: 7..7, - change_kind: ChangeKind::RemovedBelow - }, - ChangeGroup { - before: 10..12, - after: 10..12, - change_kind: ChangeKind::Modified - }, - ChangeGroup { - before: 13..13, - after: 13..14, - change_kind: ChangeKind::Added - }, - ChangeGroup { - before: 17..17, - after: 19..23, - change_kind: ChangeKind::Added - } - ] - ); + let actual = gix_diff::blob::diff(Algorithm::Histogram, &input, GitDiff::new(&input)); + assert_eq!(actual, expected); + insta::assert_snapshot!(uni_diff_string(&input, actual), @r#" + @@ -1,1 +1,2 @@ + +/// This is a struct + struct SomeStruct { + @@ -6,3 +7,2 @@ + fn main() { + - // Some comment + let c = SomeStruct { field1: 10.0, field2: 10.0 }; + @@ -10,5 +10,6 @@ + println!( + - "Print field1 from SomeStruct {}", + - get_field1(&c) + + "Print field1 and field2 from SomeStruct {} {}", + + get_field1(&c), get_field2(&c) + ); + + println!("Print another line"); + } + @@ -17,2 +19,6 @@ + c.field1 + + + +fn get_field2(c: &SomeStruct) -> f64 { + + c.field2 + +} + } + "#); + + let standard_slider = gix_diff::blob::diff(Algorithm::Histogram, &input, uni_diff(&input))?; + insta::assert_snapshot!(standard_slider, @r#" + @@ -1,1 +1,2 @@ + +/// This is a struct + struct SomeStruct { + @@ -6,3 +7,2 @@ + fn main() { + - // Some comment + let c = SomeStruct { field1: 10.0, field2: 10.0 }; + @@ -10,5 +10,6 @@ + println!( + - "Print field1 from SomeStruct {}", + - get_field1(&c) + + "Print field1 and field2 from SomeStruct {} {}", + + get_field1(&c), get_field2(&c) + ); + + println!("Print another line"); + } + @@ -17,2 +18,6 @@ + c.field1 + +} + + + +fn get_field2(c: &SomeStruct) -> f64 { + + c.field2 + } + "#); + + let input = InternedInput::new(before.as_bytes(), after.as_bytes()); + let actual = gix_diff::blob::diff(Algorithm::Histogram, &input, GitDiff::new(&input)); + assert_eq!(actual, expected); + + Ok(()) +} + +fn uni_diff>(input: &InternedInput) -> UnifiedDiff<'_, T, String> { + UnifiedDiff::new( + input, + String::default(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(1), + ) +} + +fn uni_diff_string>(input: &InternedInput, changes: Vec) -> String { + let mut uni = uni_diff(input); + for change in changes { + let (before, after) = change.as_u32_ranges(); + uni.process_change(before, after); + } + uni.finish().expect("in-memory is infallible") }