From ff477971ee5f22e671a366f7549fb0da338ab871 Mon Sep 17 00:00:00 2001 From: Paul Weiss Date: Mon, 2 Mar 2020 13:45:43 -0500 Subject: [PATCH] Add options to Diff and DiffChunk. The only implemented option is Transform, which allows modifying the lines before diffing. This allows one to ignore irrelevant changes, e.g. timestamps in log files. --- diff/diff.go | 42 ++++++++++++++++++++++++++++++++++++++---- diff/diff_test.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/diff/diff.go b/diff/diff.go index 71b459f..9bf77c5 100644 --- a/diff/diff.go +++ b/diff/diff.go @@ -20,6 +20,21 @@ import ( "strings" ) +type options struct { + transform func(string) string +} + +// An Option will change the behavior of the diff. +type Option func(*options) + +// The Transform option alters lines before comparing. +// You can use it to ignore certain differences. +func Transform(tr func(string) string) Option { + return func(in *options) { + in.transform = tr + } +} + // Chunk represents a piece of the diff. A chunk will not have both added and // deleted lines. Equal lines are always after any added or deleted lines. // A Chunk may or may not have any lines in it, especially for the first or last @@ -37,10 +52,10 @@ func (c *Chunk) empty() bool { // Diff returns a string containing a line-by-line unified diff of the linewise // changes required to make A into B. Each line is prefixed with '+', '-', or // ' ' to indicate if it should be added, removed, or is correct respectively. -func Diff(A, B string) string { +func Diff(A, B string, opts ...Option) string { aLines := strings.Split(A, "\n") bLines := strings.Split(B, "\n") - return Render(DiffChunks(aLines, bLines)) + return Render(DiffChunks(aLines, bLines, opts...)) } // Render renders the slice of chunks into a representation that prefixes @@ -65,12 +80,31 @@ func Render(chunks []Chunk) string { // DiffChunks uses an O(D(N+M)) shortest-edit-script algorithm // to compute the edits required from A to B and returns the // edit chunks. -func DiffChunks(a, b []string) []Chunk { +func DiffChunks(a, b []string, opts ...Option) []Chunk { // algorithm: http://www.xmailserver.org/diff2.pdf + options := &options{} + for _, o := range opts { + o(options) + } + // We'll need these quantities a lot. alen, blen := len(a), len(b) // M, N + applyTransform := func(in []string) []string { + if options.transform == nil { + return in + } + out := make([]string, len(in)) + for i, s := range in { + out[i] = options.transform(s) + } + return out + } + + aa := applyTransform(a) + bb := applyTransform(b) + // At most, it will require len(a) deletions and len(b) additions // to transform a into b. maxPath := alen + blen // MAX @@ -125,7 +159,7 @@ dLoop: // On diagonal d, we can compute bidx from aidx. bidx := aidx - diag // y // See how far we can go on this diagonal before we find a difference. - for aidx < alen && bidx < blen && a[aidx] == b[bidx] { + for aidx < alen && bidx < blen && aa[aidx] == bb[bidx] { aidx++ bidx++ } diff --git a/diff/diff_test.go b/diff/diff_test.go index ebdd450..01f221d 100644 --- a/diff/diff_test.go +++ b/diff/diff_test.go @@ -226,3 +226,31 @@ States of America. // and our Posterity, do ordain and establish this Constitution for the United // States of America. } + +func TestDiffTransform(t *testing.T) { + a := strings.TrimSpace(` +10:01 Line 1 +10:01 Line 2 +10:01 Line 3 +`) + b := strings.TrimSpace(` +10:15 Line 1 +10:15 Line 2a +10:15 Line 3 +`) + want := ` 10:01 Line 1 +-10:01 Line 2 ++10:15 Line 2a + 10:01 Line 3` + got := Diff(a, b, Transform( + func(in string) string { + if len(in) < 6 { + return in + } + return in[6:] + })) + if got != want { + t.Errorf("GOT\n%#v\n", got) + t.Errorf("WANT\n%#v\n", want) + } +}