Documentation
¶
Overview ¶
Package diffx implements the Myers O(ND) diff algorithm with heuristics for better output quality on large files with many small, scattered changes.
Unlike simple Myers implementations, diffx includes:
- Preprocessing: Filters out high-frequency elements that cause spurious matches
- Heuristics: Early termination for expensive comparisons
- Postprocessing: Shifts diff boundaries for more readable output
Example ¶
package main
import (
"fmt"
"github.com/dacharyc/diffx"
)
func main() {
old := []string{"The", "quick", "brown", "fox", "jumps"}
new := []string{"A", "slow", "red", "fox", "leaps"}
ops := diffx.Diff(old, new)
for _, op := range ops {
switch op.Type {
case diffx.Equal:
fmt.Printf(" %v\n", old[op.AStart:op.AEnd])
case diffx.Delete:
fmt.Printf("- %v\n", old[op.AStart:op.AEnd])
case diffx.Insert:
fmt.Printf("+ %v\n", new[op.BStart:op.BEnd])
}
}
}
Output: - [The quick brown] + [A slow red] [fox] - [jumps] + [leaps]
Index ¶
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type DiffOp ¶
type DiffOp struct {
Type OpType
AStart int // start index in sequence A (inclusive)
AEnd int // end index in sequence A (exclusive)
BStart int // start index in sequence B (inclusive)
BEnd int // end index in sequence B (exclusive)
}
DiffOp represents a single edit operation with index ranges.
func Diff ¶
Diff compares two string slices using the Myers algorithm. For histogram-style diff, use DiffHistogram instead.
Example ¶
package main
import (
"fmt"
"github.com/dacharyc/diffx"
)
func main() {
old := []string{"hello", "world"}
new := []string{"hello", "there", "world"}
ops := diffx.Diff(old, new)
for _, op := range ops {
switch op.Type {
case diffx.Equal:
fmt.Printf("KEEP: %v\n", old[op.AStart:op.AEnd])
case diffx.Delete:
fmt.Printf("DELETE: %v\n", old[op.AStart:op.AEnd])
case diffx.Insert:
fmt.Printf("INSERT: %v\n", new[op.BStart:op.BEnd])
}
}
}
Output: KEEP: [hello] INSERT: [there] KEEP: [world]
Example (Prose) ¶
package main
import (
"fmt"
"strings"
"github.com/dacharyc/diffx"
)
func main() {
// diffx groups changes coherently, avoiding fragmentation around common words
old := strings.Split("The quick brown fox jumps over the lazy dog", " ")
new := strings.Split("A slow red fox leaps over the sleeping cat", " ")
ops := diffx.Diff(old, new)
// Count change regions (consecutive delete/insert operations)
changeRegions := 0
inChange := false
for _, op := range ops {
if op.Type == diffx.Equal {
inChange = false
} else if !inChange {
changeRegions++
inChange = true
}
}
fmt.Printf("Change regions: %d\n", changeRegions)
}
Output: Change regions: 3
Example (WithOptions) ¶
package main
import (
"fmt"
"github.com/dacharyc/diffx"
)
func main() {
old := []string{"a", "b", "c"}
new := []string{"a", "x", "c"}
// Force minimal edit distance (slower but mathematically optimal)
ops := diffx.Diff(old, new, diffx.WithMinimal(true))
for _, op := range ops {
fmt.Printf("%s: A[%d:%d] B[%d:%d]\n",
op.Type, op.AStart, op.AEnd, op.BStart, op.BEnd)
}
}
Output: Equal: A[0:1] B[0:1] Delete: A[1:2] B[1:1] Insert: A[2:2] B[1:2] Equal: A[2:3] B[2:3]
func DiffElements ¶
DiffElements compares arbitrary Element slices using the Myers algorithm. For histogram-style diff, use DiffElementsHistogram instead.
Example ¶
package main
import (
"fmt"
"github.com/dacharyc/diffx"
)
// CustomElement demonstrates implementing the Element interface
// for custom types.
type CustomElement struct {
ID int
Name string
}
func (e CustomElement) Equal(other diffx.Element) bool {
o, ok := other.(CustomElement)
if !ok {
return false
}
return e.ID == o.ID
}
func (e CustomElement) Hash() uint64 {
return uint64(e.ID)
}
func main() {
old := []diffx.Element{
CustomElement{1, "Alice"},
CustomElement{2, "Bob"},
CustomElement{3, "Charlie"},
}
new := []diffx.Element{
CustomElement{1, "Alice Smith"}, // Same ID, different name - considered equal
CustomElement{4, "David"}, // New element
CustomElement{3, "Charlie"},
}
ops := diffx.DiffElements(old, new)
for _, op := range ops {
switch op.Type {
case diffx.Equal:
fmt.Printf("KEEP: IDs %v\n", getIDs(old[op.AStart:op.AEnd]))
case diffx.Delete:
fmt.Printf("DELETE: IDs %v\n", getIDs(old[op.AStart:op.AEnd]))
case diffx.Insert:
fmt.Printf("INSERT: IDs %v\n", getIDs(new[op.BStart:op.BEnd]))
}
}
}
func getIDs(elems []diffx.Element) []int {
ids := make([]int, len(elems))
for i, e := range elems {
ids[i] = e.(CustomElement).ID
}
return ids
}
Output: KEEP: IDs [1] DELETE: IDs [2] INSERT: IDs [4] KEEP: IDs [3]
func DiffElementsHistogram ¶
DiffElementsHistogram performs histogram-style diff on Element slices.
func DiffHistogram ¶
DiffHistogram performs histogram-style diff on string slices.
Example ¶
package main
import (
"fmt"
"github.com/dacharyc/diffx"
)
func main() {
// Histogram diff is especially good for files with many common tokens
old := []string{"the", "quick", "fox", "the", "end"}
new := []string{"the", "slow", "fox", "the", "end"}
ops := diffx.DiffHistogram(old, new)
for _, op := range ops {
switch op.Type {
case diffx.Equal:
fmt.Printf(" %v\n", old[op.AStart:op.AEnd])
case diffx.Delete:
fmt.Printf("- %v\n", old[op.AStart:op.AEnd])
case diffx.Insert:
fmt.Printf("+ %v\n", new[op.BStart:op.BEnd])
}
}
}
Output: [the] - [quick] + [slow] [fox the end]
type Element ¶
type Element interface {
// Equal reports whether this element is equal to another.
Equal(other Element) bool
// Hash returns a hash value for this element.
// Equal elements must have equal hashes.
Hash() uint64
}
Element represents a comparable unit (line, word, token). Implementations must provide equality comparison and hashing.
type OpType ¶
type OpType int
OpType identifies the type of edit operation.
func (OpType) String ¶
String returns a string representation of the OpType.
Example ¶
package main
import (
"fmt"
"github.com/dacharyc/diffx"
)
func main() {
ops := []diffx.OpType{diffx.Equal, diffx.Insert, diffx.Delete}
for _, op := range ops {
fmt.Println(op.String())
}
}
Output: Equal Insert Delete
type Option ¶
type Option func(*options)
Option configures diff behavior.
func WithAnchorElimination ¶
WithAnchorElimination enables or disables anchor elimination post-processing. Default: true.
func WithCostLimit ¶
WithCostLimit sets custom early termination threshold. 0 means auto-calculate based on input size. Default: 0.
func WithHeuristic ¶
WithHeuristic enables or disables speed heuristics. Default: true.
func WithMinimal ¶
WithMinimal forces minimal edit script even if slow. Default: false.
func WithPostprocessing ¶
WithPostprocessing enables or disables boundary shifting. Default: true.
func WithPreprocessing ¶
WithPreprocessing enables or disables confusing element filtering. Default: true.
type StringElement ¶
type StringElement string
StringElement is the common case for line/word comparison.
func (StringElement) Equal ¶
func (s StringElement) Equal(other Element) bool
Equal reports whether s equals other. Returns false if other is not a StringElement.
func (StringElement) Hash ¶
func (s StringElement) Hash() uint64
Hash returns a FNV-1a hash of the string.