Skip to content

Commit

Permalink
Fix working with non-ascii symbols.
Browse files Browse the repository at this point in the history
  • Loading branch information
Denis Krivak committed Sep 4, 2021
1 parent a0deae5 commit 728769b
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 12 deletions.
40 changes: 32 additions & 8 deletions checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ func checkCommentForPeriod(c comment) *Issue {
// Make a replacement. Use `pos.line` to get an original line from
// attached lines. Use `iss.Pos.Column` because it's a position in
// the original line.
original := []rune(c.lines[pos.line-1])
iss.Replacement = string(original[:iss.Pos.Column-1]) + "." +
string(original[iss.Pos.Column-1:])
original := c.lines[pos.line-1]
iss.Replacement = original[:iss.Pos.Column-1] + "." +
original[iss.Pos.Column-1:]

// Save replacement to raw lines to be able to combine it with
// further replacements
Expand Down Expand Up @@ -118,9 +118,11 @@ func checkCommentForCapital(c comment) []Issue {
// Make a replacement. Use `pos.line` to get an original line from
// attached lines. Use `iss.Pos.Column` because it's a position in
// the original line.
rep := []rune(c.lines[pos.line-1])
rep[iss.Pos.Column-1] = unicode.ToTitle(rep[iss.Pos.Column-1])
iss.Replacement = string(rep)
line := c.lines[pos.line-1]
col := byteToRuneColumn(line, iss.Pos.Column) - 1
rep := string(unicode.ToTitle([]rune(line)[col])) // capital letter
iss.Replacement = line[:iss.Pos.Column-1] + rep +
line[iss.Pos.Column-1+len(rep):]

// Save replacement to raw lines to be able to combine it with
// further replacements
Expand Down Expand Up @@ -158,7 +160,7 @@ func checkPeriod(comment string) (pos position, ok bool) {
return position{}, true
}

pos.column = len([]rune(line)) + 1
pos.column = len(line) + 1
return pos, false
}

Expand Down Expand Up @@ -209,7 +211,10 @@ func checkCapital(comment string, skipFirst bool) (pp []position) {
continue
}
if state == endOfSentence && unicode.IsLower(r) {
pp = append(pp, position{line: pos.line, column: pos.column})
pp = append(pp, position{
line: pos.line,
column: runeToByteColumn(comment, pos.column),
})
}
state = empty
}
Expand Down Expand Up @@ -267,3 +272,22 @@ func hasSuffix(s string, suffixes []string) bool {
}
return false
}

// The following two functions convert byte and rune indexes.
//
// Example:
// text: a b c Ш e f
// runes: 1 2 3 4 5 6
// bytes: 0 1 2 3 5 6
// The reason of the difference is that the size of "Ш" is 2 bytes.
// NOTE: Works only for 1-based indexes (line columns).

// byteToRuneColumn converts byte index inside the string to rune index.
func byteToRuneColumn(s string, i int) int {
return len([]rune(s[:i-1])) + 1
}

// runeToByteColumn converts rune index inside the string to byte index.
func runeToByteColumn(s string, i int) int {
return len(string([]rune(s)[:i-1])) + 1
}
78 changes: 76 additions & 2 deletions checks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func TestCheckPeriod(t *testing.T) {
{
name: "cyrillic, without period",
text: "Кириллица",
issue: position{line: 1, column: 10},
issue: position{line: 1, column: 19},
},
{
name: "parenthesis, with period",
Expand Down Expand Up @@ -169,7 +169,7 @@ func TestCheckCapital(t *testing.T) {
text: "Кириллица? кириллица!",
skipFirst: false,
issues: []position{
{line: 1, column: 12},
{line: 1, column: 21},
},
},
{
Expand Down Expand Up @@ -395,3 +395,77 @@ func TestHasSuffix(t *testing.T) {
})
}
}

func TestByteToRuneColumn(t *testing.T) {
testCases := []struct {
name string
str string
index int
out int
}{
{
name: "ascii symbols",
str: "hello, world",
index: 5,
out: 5,
},
{
name: "cyrillic symbols at the end",
str: "hello, мир",
index: 5,
out: 5,
},
{
name: "cyrillic symbols at the beginning",
str: "привет, world",
index: 15,
out: 9,
},
}

for _, tt := range testCases {
tt := tt
t.Run(tt.name, func(t *testing.T) {
if out := byteToRuneColumn(tt.str, tt.index); out != tt.out {
t.Fatalf("Wrong column\n expected: %d\n got: %d", tt.out, out)
}
})
}
}

func TestRuneToByteColumn(t *testing.T) {
testCases := []struct {
name string
str string
index int
out int
}{
{
name: "ascii symbols",
str: "hello, world",
index: 5,
out: 5,
},
{
name: "cyrillic symbols at the end",
str: "hello, мир",
index: 5,
out: 5,
},
{
name: "cyrillic symbols at the beginning",
str: "привет, world",
index: 9,
out: 15,
},
}

for _, tt := range testCases {
tt := tt
t.Run(tt.name, func(t *testing.T) {
if out := runeToByteColumn(tt.str, tt.index); out != tt.out {
t.Fatalf("Wrong column\n expected: %d\n got: %d", tt.out, out)
}
})
}
}
4 changes: 2 additions & 2 deletions godot.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ type Issue struct {

// position is a position inside a comment (might be multiline comment).
type position struct {
line int
column int
line int // starts at 1
column int // starts at 1, byte count
}

// comment is an internal representation of AST comment entity with additional
Expand Down

0 comments on commit 728769b

Please sign in to comment.