Skip to content

Commit f91e37c

Browse files
committed
fix TestMassiveRuneDiffConversion by skip invalid unicode code point
1 parent 3403a16 commit f91e37c

File tree

4 files changed

+64
-19
lines changed

4 files changed

+64
-19
lines changed

diffmatchpatch/diff.go

+16-11
Original file line numberDiff line numberDiff line change
@@ -388,24 +388,29 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
388388
}
389389

390390
// DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line.
391-
// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
392391
func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
393-
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
394-
return chars1, chars2, lineArray
392+
chars1, chars2, lineArray := dmp.diffLinesToIndexes(text1, text2)
393+
return indexesToString(chars1), indexesToString(chars2), lineArray
395394
}
396395

397396
// DiffLinesToRunes splits two texts into a list of runes.
398397
func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
398+
chars1, chars2, lineArray := dmp.diffLinesToIndexes(text1, text2)
399+
return []rune(indexesToString(chars1)), []rune(indexesToString(chars2)), lineArray
400+
}
401+
402+
// diffLinesToIndexes splits two texts into a list of indexes
403+
func (dmp *DiffMatchPatch) diffLinesToIndexes(text1, text2 string) ([]index, []index, []string) {
399404
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
400-
return []rune(chars1), []rune(chars2), lineArray
405+
return chars1, chars2, lineArray
401406
}
402407

403408
// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
404409
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
405410
hydrated := make([]Diff, 0, len(diffs))
406411
for _, aDiff := range diffs {
407412
var sb strings.Builder
408-
for _, i := range []rune(aDiff.Text) {
413+
for _, i := range stringToIndex(aDiff.Text) {
409414
sb.WriteString(lineArray[i])
410415
}
411416
aDiff.Text = sb.String()
@@ -1301,24 +1306,24 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Di
13011306
}
13021307

13031308
// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
1304-
func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
1309+
func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) ([]index, []index, []string) {
13051310
// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
13061311
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
13071312

13081313
//Each string has the index of lineArray which it points to
13091314
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
13101315
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
13111316

1312-
return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
1317+
return strIndexArray1, strIndexArray2, lineArray
13131318
}
13141319

13151320
// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
1316-
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
1321+
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []index {
13171322
// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
13181323
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
13191324
lineStart := 0
13201325
lineEnd := -1
1321-
strs := []uint32{}
1326+
strs := []index{}
13221327

13231328
for lineEnd < len(text)-1 {
13241329
lineEnd = indexOf(text, "\n", lineStart)
@@ -1332,11 +1337,11 @@ func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]str
13321337
lineValue, ok := lineHash[line]
13331338

13341339
if ok {
1335-
strs = append(strs, uint32(lineValue))
1340+
strs = append(strs, index(lineValue))
13361341
} else {
13371342
*lineArray = append(*lineArray, line)
13381343
lineHash[line] = len(*lineArray) - 1
1339-
strs = append(strs, uint32(len(*lineArray)-1))
1344+
strs = append(strs, index(len(*lineArray)-1))
13401345
}
13411346
}
13421347

diffmatchpatch/index.go

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package diffmatchpatch
2+
3+
type index uint32
4+
5+
const runeSkipStart = 0xd800
6+
const runeSkipEnd = 0xdfff + 1
7+
const runeMax = 0x110000 // next invalid code point
8+
9+
func stringToIndex(text string) []index {
10+
runes := []rune(text)
11+
indexes := make([]index, len(runes))
12+
for i, r := range runes {
13+
if r < runeSkipEnd {
14+
indexes[i] = index(r)
15+
} else {
16+
indexes[i] = index(r) - (runeSkipEnd - runeSkipStart)
17+
}
18+
}
19+
return indexes
20+
}
21+
22+
func indexesToString(indexes []index) string {
23+
runes := make([]rune, len(indexes))
24+
for i, index := range indexes {
25+
if index < runeSkipStart {
26+
runes[i] = rune(index)
27+
} else {
28+
runes[i] = rune(index + (runeSkipEnd - runeSkipStart))
29+
}
30+
}
31+
return string(runes)
32+
}

diffmatchpatch/index_test.go

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package diffmatchpatch
2+
3+
import (
4+
"github.com/stretchr/testify/assert"
5+
"testing"
6+
)
7+
8+
func TestIndexConversion(t *testing.T) {
9+
n := runeMax - (runeSkipEnd - runeSkipStart)
10+
indexes := make([]index, n)
11+
for i := 0; i < n; i++ {
12+
indexes[i] = index(i)
13+
}
14+
indexes2 := stringToIndex(indexesToString(indexes))
15+
assert.EqualValues(t, indexes, indexes2)
16+
}

diffmatchpatch/stringutil.go

-8
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,3 @@ func runesIndex(r1, r2 []rune) int {
8686
}
8787
return -1
8888
}
89-
90-
func intArrayToString(ns []uint32) string {
91-
runes := make([]rune, len(ns))
92-
for i := 0; i < len(ns); i++ {
93-
runes[i] = rune(ns[i])
94-
}
95-
return string(runes)
96-
}

0 commit comments

Comments
 (0)