diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
index 4f7b424..08c36e7 100644
--- a/diffmatchpatch/diff.go
+++ b/diffmatchpatch/diff.go
@@ -34,8 +34,6 @@ const (
DiffInsert Operation = 1
// DiffEqual item represents an equal diff.
DiffEqual Operation = 0
- //IndexSeparator is used to seperate the array indexes in an index string
- IndexSeparator = ","
)
// Diff represents one diff operation
@@ -406,14 +404,11 @@ func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
hydrated := make([]Diff, 0, len(diffs))
for _, aDiff := range diffs {
- chars := strings.Split(aDiff.Text, IndexSeparator)
- text := make([]string, len(chars))
+ runes := []rune(aDiff.Text)
+ text := make([]string, len(runes))
- for i, r := range chars {
- i1, err := strconv.Atoi(r)
- if err == nil {
- text[i] = lineArray[i1]
- }
+ for i, r := range runes {
+ text[i] = lineArray[runeToInt(r)]
}
aDiff.Text = strings.Join(text, "")
@@ -1151,13 +1146,28 @@ func (dmp *DiffMatchPatch) DiffPrettyText(diffs []Diff) string {
switch diff.Type {
case DiffInsert:
- _, _ = buff.WriteString("\x1b[32m")
- _, _ = buff.WriteString(text)
- _, _ = buff.WriteString("\x1b[0m")
+ lines := strings.Split(text, "\n")
+ for i, line := range lines {
+ _, _ = buff.WriteString("\x1b[32m")
+ _, _ = buff.WriteString(line)
+ if i < len(lines)-1 {
+ _, _ = buff.WriteString("\x1b[0m\n")
+ } else {
+ _, _ = buff.WriteString("\x1b[0m")
+ }
+ }
+
case DiffDelete:
- _, _ = buff.WriteString("\x1b[31m")
- _, _ = buff.WriteString(text)
- _, _ = buff.WriteString("\x1b[0m")
+ lines := strings.Split(text, "\n")
+ for i, line := range lines {
+ _, _ = buff.WriteString("\x1b[31m")
+ _, _ = buff.WriteString(line)
+ if i < len(lines)-1 {
+ _, _ = buff.WriteString("\x1b[0m\n")
+ } else {
+ _, _ = buff.WriteString("\x1b[0m")
+ }
+ }
case DiffEqual:
_, _ = buff.WriteString(text)
}
@@ -1310,7 +1320,6 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Di
// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
- // '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
lineHash := make(map[string]int)
@@ -1321,12 +1330,11 @@ func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, stri
return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
}
-// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
-func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []uint32 {
- // Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
+// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []index.
+func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []index {
lineStart := 0
lineEnd := -1
- strs := []uint32{}
+ strs := []index{}
for lineEnd < len(text)-1 {
lineEnd = indexOf(text, "\n", lineStart)
@@ -1340,11 +1348,11 @@ func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]str
lineValue, ok := lineHash[line]
if ok {
- strs = append(strs, uint32(lineValue))
+ strs = append(strs, index(lineValue))
} else {
*lineArray = append(*lineArray, line)
lineHash[line] = len(*lineArray) - 1
- strs = append(strs, uint32(len(*lineArray)-1))
+ strs = append(strs, index(len(*lineArray)-1))
}
}
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index d6fed50..2c43864 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go
@@ -314,12 +314,12 @@ func TestDiffLinesToChars(t *testing.T) {
dmp := New()
for i, tc := range []TestCase{
- {"", "alpha\r\nbeta\r\n\r\n\r\n", "", "1,2,3,3", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
- {"a", "b", "1", "2", []string{"", "a", "b"}},
+ {"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\x01\x02\x03\x03", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
+ {"a", "b", "\x01", "\x02", []string{"", "a", "b"}},
// Omit final newline.
- {"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
+ {"alpha\nbeta\nalpha", "", "\x01\x02\x03", "", []string{"", "alpha\n", "beta\n", "alpha"}},
// Same lines in Text1 and Text2
- {"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "1,2,3", "1,4,3,5", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
+ {"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "\x01\x02\x03", "\x01\x04\x03\x05", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
} {
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
assert.Equal(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -332,14 +332,14 @@ func TestDiffLinesToChars(t *testing.T) {
lineList := []string{
"", // Account for the initial empty element of the lines array.
}
- var charList []string
+ var charList []index
for x := 1; x < n+1; x++ {
lineList = append(lineList, strconv.Itoa(x)+"\n")
- charList = append(charList, strconv.Itoa(x))
+ charList = append(charList, index(x))
}
lines := strings.Join(lineList, "")
- chars := strings.Join(charList[:], ",")
- assert.Equal(t, n, len(strings.Split(chars, ",")))
+ chars := indexesToString(charList)
+ assert.Equal(t, n, len(charList))
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
assert.Equal(t, chars, actualChars1)
@@ -360,8 +360,8 @@ func TestDiffCharsToLines(t *testing.T) {
for i, tc := range []TestCase{
{
Diffs: []Diff{
- {DiffEqual, "1,2,1"},
- {DiffInsert, "2,1,2"},
+ {DiffEqual, "\x01\x02\x01"},
+ {DiffInsert, "\x02\x01\x02"},
},
Lines: []string{"", "alpha\n", "beta\n"},
@@ -380,13 +380,13 @@ func TestDiffCharsToLines(t *testing.T) {
lineList := []string{
"", // Account for the initial empty element of the lines array.
}
- charList := []string{}
+ charList := []index{}
for x := 1; x <= n; x++ {
lineList = append(lineList, strconv.Itoa(x)+"\n")
- charList = append(charList, strconv.Itoa(x))
+ charList = append(charList, index(x))
}
assert.Equal(t, n, len(charList))
- chars := strings.Join(charList[:], ",")
+ chars := indexesToString(charList)
actual := dmp.DiffCharsToLines([]Diff{Diff{DiffDelete, chars}}, lineList)
assert.Equal(t, []Diff{Diff{DiffDelete, strings.Join(lineList, "")}}, actual)
@@ -1033,6 +1033,17 @@ func TestDiffPrettyText(t *testing.T) {
Expected: "a\n\x1b[31mb\x1b[0m\x1b[32mc&d\x1b[0m",
},
+ {
+ Diffs: []Diff{
+ {Type: DiffEqual, Text: "a\n"},
+ {Type: DiffDelete, Text: "b\nc\n"},
+ {Type: DiffEqual, Text: "def"},
+ {Type: DiffInsert, Text: "\ng\nh"},
+ {Type: DiffEqual, Text: "\ni"},
+ },
+
+ Expected: "a\n\x1b[31mb\x1b[0m\n\x1b[31mc\x1b[0m\n\x1b[31m\x1b[0mdef\x1b[32m\x1b[0m\n\x1b[32mg\x1b[0m\n\x1b[32mh\x1b[0m\ni",
+ },
} {
actual := dmp.DiffPrettyText(tc.Diffs)
assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -1468,6 +1479,38 @@ func TestMassiveRuneDiffConversion(t *testing.T) {
assert.NotEmpty(t, diffs)
}
+func TestDiffPartialLineIndex(t *testing.T) {
+ dmp := New()
+ t1, t2, tt := dmp.DiffLinesToChars(
+ `line 1
+line 2
+line 3
+line 4
+line 5
+line 6
+line 7
+line 8
+line 9
+line 10 text1`,
+ `line 1
+line 2
+line 3
+line 4
+line 5
+line 6
+line 7
+line 8
+line 9
+line 10 text2`)
+ diffs := dmp.DiffMain(t1, t2, false)
+ diffs = dmp.DiffCharsToLines(diffs, tt)
+ assert.Equal(t, []Diff{
+ Diff{DiffEqual, "line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\n"},
+ Diff{DiffDelete, "line 10 text1"},
+ Diff{DiffInsert, "line 10 text2"},
+ }, diffs)
+}
+
func BenchmarkDiffMain(bench *testing.B) {
s1 := "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"
s2 := "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"
diff --git a/diffmatchpatch/index.go b/diffmatchpatch/index.go
new file mode 100644
index 0000000..965a1c6
--- /dev/null
+++ b/diffmatchpatch/index.go
@@ -0,0 +1,32 @@
+package diffmatchpatch
+
+type index uint32
+
+const runeSkipStart = 0xd800
+const runeSkipEnd = 0xdfff + 1
+const runeMax = 0x110000 // next invalid code point
+
+func stringToIndex(text string) []index {
+ runes := []rune(text)
+ indexes := make([]index, len(runes))
+ for i, r := range runes {
+ if r < runeSkipEnd {
+ indexes[i] = index(r)
+ } else {
+ indexes[i] = index(r) - (runeSkipEnd - runeSkipStart)
+ }
+ }
+ return indexes
+}
+
+func indexesToString(indexes []index) string {
+ runes := make([]rune, len(indexes))
+ for i, index := range indexes {
+ if index < runeSkipStart {
+ runes[i] = rune(index)
+ } else {
+ runes[i] = rune(index + (runeSkipEnd - runeSkipStart))
+ }
+ }
+ return string(runes)
+}
diff --git a/diffmatchpatch/index_test.go b/diffmatchpatch/index_test.go
new file mode 100644
index 0000000..6f1d982
--- /dev/null
+++ b/diffmatchpatch/index_test.go
@@ -0,0 +1,16 @@
+package diffmatchpatch
+
+import (
+ "github.com/stretchr/testify/assert"
+ "testing"
+)
+
+func TestIndexConversion(t *testing.T) {
+ n := runeMax - (runeSkipEnd - runeSkipStart)
+ indexes := make([]index, n)
+ for i := 0; i < n; i++ {
+ indexes[i] = index(i)
+ }
+ indexes2 := stringToIndex(indexesToString(indexes))
+ assert.EqualValues(t, indexes, indexes2)
+}
diff --git a/diffmatchpatch/patch_test.go b/diffmatchpatch/patch_test.go
index b019f88..c564f8c 100644
--- a/diffmatchpatch/patch_test.go
+++ b/diffmatchpatch/patch_test.go
@@ -337,3 +337,28 @@ func TestPatchApply(t *testing.T) {
assert.Equal(t, tc.ExpectedApplies, actualApplies, fmt.Sprintf("Test case #%d, %s", i, tc.Name))
}
}
+
+func TestPatchMakeOutOfRangePanic(t *testing.T) {
+ text1 := `
+ 1111111111111 000000
+ ------------- ------
+ xxxxxxxxxxxxx ------
+ xxxxxxxxxxxxx ------
+ xxxxxxxxxxxxx xxxxxx
+ xxxxxxxxxxxxx ......
+ xxxxxxxxxxxxx 111111
+ xxxxxxxxxxxxx ??????
+ xxxxxxxxxxxxx 333333
+ xxxxxxxxxxxxx 555555
+ xxxxxxxxxx xxxxx
+ xxxxxxxxxx xxxxx
+ xxxxxxxxxx xxxxx
+ xxxxxxxxxx xxxxx
+`
+ text2 := `
+ 2222222222222 000000
+ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
+ dmp := New()
+ patches := dmp.PatchMake(text1, text2)
+ assert.Equal(t, 6, len(patches), "TestPatchMakeOutOfRangePanic")
+}
diff --git a/diffmatchpatch/stringutil.go b/diffmatchpatch/stringutil.go
index 44c4359..573b6bf 100644
--- a/diffmatchpatch/stringutil.go
+++ b/diffmatchpatch/stringutil.go
@@ -9,11 +9,16 @@
package diffmatchpatch
import (
- "strconv"
+ "fmt"
"strings"
"unicode/utf8"
)
+const UNICODE_INVALID_RANGE_START = 0xD800
+const UNICODE_INVALID_RANGE_END = 0xDFFF
+const UNICODE_INVALID_RANGE_DELTA = UNICODE_INVALID_RANGE_END - UNICODE_INVALID_RANGE_START + 1
+const UNICODE_RANGE_MAX = 0x10FFFF
+
// unescaper unescapes selected chars for compatibility with JavaScript's encodeURI.
// In speed critical applications this could be dropped since the receiving application will certainly decode these fine. Note that this function is case-sensitive. Thus "%3F" would not be unescaped. But this is ok because it is only called with the output of HttpUtility.UrlEncode which returns lowercase hex. Example: "%3f" -> "?", "%24" -> "$", etc.
var unescaper = strings.NewReplacer(
@@ -88,19 +93,98 @@ func runesIndex(r1, r2 []rune) int {
return -1
}
-func intArrayToString(ns []uint32) string {
+func intArrayToString(ns []index) string {
if len(ns) == 0 {
return ""
}
- indexSeparator := IndexSeparator[0]
-
- // Appr. 3 chars per num plus the comma.
- b := []byte{}
+ b := []rune{}
for _, n := range ns {
- b = strconv.AppendInt(b, int64(n), 10)
- b = append(b, indexSeparator)
+ b = append(b, intToRune(uint32(n)))
}
- b = b[:len(b)-1]
return string(b)
}
+
+// These constants define the number of bits representable
+// in 1,2,3,4 byte utf8 sequences, respectively.
+const ONE_BYTE_BITS = 7
+const TWO_BYTE_BITS = 11
+const THREE_BYTE_BITS = 16
+const FOUR_BYTE_BITS = 21
+
+// Helper for getting a sequence of bits from an integer.
+func getBits(i uint32, cnt byte, from byte) byte {
+ return byte((i >> from) & ((1 << cnt) - 1))
+}
+
+// Converts an integer in the range 0~1112060 into a rune.
+// Based on the ranges table in https://en.wikipedia.org/wiki/UTF-8
+func intToRune(i uint32) rune {
+ if i < (1 << ONE_BYTE_BITS) {
+ return rune(i)
+ }
+
+ if i < (1 << TWO_BYTE_BITS) {
+ r, size := utf8.DecodeRune([]byte{0b11000000 | getBits(i, 5, 6), 0b10000000 | getBits(i, 6, 0)})
+ if size != 2 || r == utf8.RuneError {
+ panic(fmt.Sprintf("Error encoding an int %d with size 2, got rune %v and size %d", size, r, i))
+ }
+ return r
+ }
+
+ // Last -3 here needed because for some reason 3rd to last codepoint 65533 in this range
+ // was returning utf8.RuneError during encoding.
+ if i < ((1 << THREE_BYTE_BITS) - UNICODE_INVALID_RANGE_DELTA - 3) {
+ if i >= UNICODE_INVALID_RANGE_START {
+ i += UNICODE_INVALID_RANGE_DELTA
+ }
+
+ r, size := utf8.DecodeRune([]byte{0b11100000 | getBits(i, 4, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
+ if size != 3 || r == utf8.RuneError {
+ panic(fmt.Sprintf("Error encoding an int %d with size 3, got rune %v and size %d", size, r, i))
+ }
+ return r
+ }
+
+ if i < (1<= UNICODE_INVALID_RANGE_END {
+ return result - UNICODE_INVALID_RANGE_DELTA
+ }
+
+ return result
+ }
+
+ if size == 4 {
+ result := uint32(bytes[0]&0b111)<<18 | uint32(bytes[1]&0b111111)<<12 | uint32(bytes[2]&0b111111)<<6 | uint32(bytes[3]&0b111111)
+ return result - UNICODE_INVALID_RANGE_DELTA - 3
+ }
+
+ panic(fmt.Sprintf("Unexpected state decoding rune=%v size=%d", r, size))
+}
diff --git a/diffmatchpatch/stringutil_test.go b/diffmatchpatch/stringutil_test.go
index ab2bc10..73ab6ca 100644
--- a/diffmatchpatch/stringutil_test.go
+++ b/diffmatchpatch/stringutil_test.go
@@ -114,3 +114,21 @@ func TestLastIndexOf(t *testing.T) {
assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc))
}
}
+
+// Exhaustive check for all ints from 0 to 1112060 for correctness of implementation
+// of `intToRune() -> runeToInt()`.
+// This test is slow and runs longer than 5 seconds but it does provide a safety
+// guarantee that these 2 functions are correct for the ranges we support.
+func TestRuneToInt(t *testing.T) {
+
+ for i := uint32(0); i <= UNICODE_RANGE_MAX-UNICODE_INVALID_RANGE_DELTA-3; i += 1 {
+ r := intToRune(i)
+ ic := runeToInt(r)
+
+ assert.Equal(t, i, ic, fmt.Sprintf("intToRune(%d)=%d and runeToInt(%d)=%d", i, r, r, ic))
+ }
+
+ assert.Panics(t, func() {
+ intToRune(UNICODE_RANGE_MAX - UNICODE_INVALID_RANGE_DELTA - 2)
+ })
+}
diff --git a/go.mod b/go.mod
index c731033..c7886ce 100644
--- a/go.mod
+++ b/go.mod
@@ -8,4 +8,4 @@ require (
gopkg.in/yaml.v2 v2.4.0 // indirect
)
-go 1.12
+go 1.13