Skip to content

Commit 10d15e2

Browse files
committed
Auto merge of #10 - mozillazg:develop, r=mozillazg
v0.6.0
2 parents c695601 + 09484c3 commit 10d15e2

File tree

7 files changed

+188
-63
lines changed

7 files changed

+188
-63
lines changed

.travis.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,13 @@ sudo: false
1111

1212
before_install:
1313
- if ! go get code.google.com/p/go.tools/cmd/cover; then go get golang.org/x/tools/cmd/cover; fi
14-
- go get golang.org/x/crypto/ssh/terminal
14+
- go get github.com/mattn/go-isatty
1515
- go get github.com/axw/gocov/gocov
1616
- go get github.com/mattn/goveralls
1717

1818
script:
1919
- go run pinyin/main.go abc
20+
- go run pinyin/main.go -s Normal abc
21+
- echo "abc" | go run pinyin/main.go
22+
- echo "abc" > abc.txt && go run pinyin/main.go < abc.txt
2023
- $HOME/gopath/bin/goveralls -repotoken WAzmh3gmiQEUYEHM2wkwSH4oTrshvMmll

CHANGELOG.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,32 @@
11
# Changelog
22

33

4+
## 0.6.0 (2016-05-14)
5+
6+
* **NEW** 命令行程序支持指定拼音风格:
7+
8+
```shell
9+
$ pinyin -s Normal 你好
10+
ni hao
11+
```
12+
* **Bugfixed** 解决韵母 i, u, ü 的问题:根据以下拼音方案,还原出正确的韵母
13+
[#8](https://github.com/mozillazg/go-pinyin/pull/8), [python-pinyin#26](https://github.com/mozillazg/python-pinyin/pull/26)
14+
15+
> i 行的韵母,前面没有声母的时候,写成:yi(衣),yɑ(呀),ye(耶),
16+
> yɑo(腰),you(忧),yɑn(烟),yin(因),yɑnɡ(央),yinɡ(英),yonɡ(雍)。
17+
>
18+
> u 行的韵母,前面没有声母的时候,写成wu(乌),wɑ(蛙),wo(窝),
19+
> wɑi(歪),wei(威),wɑn(弯),wen(温),wɑnɡ(汪),wenɡ(翁)。
20+
>
21+
> ü行的韵母跟声母j,q,x拼的时候,写成ju(居),qu(区),xu(虚),
22+
> ü上两点也省略;但是跟声母l,n拼的时候,仍然写成lü(吕),nü(女)。
23+
24+
**注意** `y` 既不是声母也不是韵母。详见 [汉语拼音方案](http://www.edu.cn/20011114/3009777.shtml)
25+
26+
* **Bugfixed** 解决未正确处理鼻音 ḿ, ń, ň, ǹ 的问题:包含鼻音的拼音不应该有声母
27+
28+
29+
430
## 0.5.0 (2016-03-12)
531

632
* **CHANGE** 改为使用来自 [pinyin-data](https://github.com/mozillazg/pinyin-data) 的拼音数据。

TODO.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11

2-
* [ ] 同步更新 [python-pinyin][1] 中修复过的 BUG
3-
* [ ] fix "``ju``, ``qu``, ``xu``, ``yu``, ``yi````wu`` 的韵母"([python-pinyin#26][2])
4-
* [ ] fix "未正确处理鼻音"([python-pinyin#31](2))
5-
* [x] 命令行程序支持从标准输入读取数据(支持管道和重定向输入)
6-
* [ ] 命令行程序支持指定拼音风格
2+
* [x] 同步更新 [python-pinyin][1] 中修复过的 BUG
3+
* [x] fix "``ju``, ``qu``, ``xu``, ``yu``, ``yi````wu`` 的韵母"([python-pinyin#26][2])
4+
* [x] fix "未正确处理鼻音"([python-pinyin#31][3])
5+
* [x] 命令行程序支持从标准输入读取数据(支持管道和重定向输入)
6+
* [x] 命令行程序支持指定拼音风格
7+
* [x] 命令行程序中改为使用 <https://github.com/mattn/go-isatty>
78

89
[1]: https://github.com/mozillazg/python-pinyin
910
[2]: https://github.com/mozillazg/python-pinyin/pull/26

phonetic_symbol.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,6 @@ var phoneticSymbol = map[string]string{
2828
"ǜ": "v4",
2929
"ń": "n2",
3030
"ň": "n3",
31-
"": "m2",
31+
"ǹ": "n4",
32+
"ḿ": "m2",
3233
}

pinyin.go

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77

88
// Meta
99
const (
10-
Version = "0.5.0"
10+
Version = "0.6.0"
1111
Author = "mozillazg, 闲耘"
1212
License = "MIT"
1313
Copyright = "Copyright (c) 2016 mozillazg, 闲耘"
@@ -38,7 +38,7 @@ const (
3838
)
3939

4040
// 声母表
41-
var initials = strings.Split(
41+
var initialArray = strings.Split(
4242
"b,p,m,f,d,t,n,l,g,k,h,j,q,x,r,zh,ch,sh,z,c,s",
4343
",",
4444
)
@@ -83,6 +83,15 @@ var Fallback = func(r rune, a Args) []string {
8383
return []string{}
8484
}
8585

86+
var finalExceptionsMap = map[string]string{
87+
"ū": "ǖ",
88+
"ú": "ǘ",
89+
"ǔ": "ǚ",
90+
"ù": "ǜ",
91+
}
92+
var reFinalExceptions = regexp.MustCompile("^(j|q|x)(ū|ú|ǔ|ù)$")
93+
var reFinal2Exceptions = regexp.MustCompile("^(j|q|x)u(\\d?)$")
94+
8695
// NewArgs 返回包含默认配置的 `Args`
8796
func NewArgs() Args {
8897
return Args{Style, Heteronym, Separator, Fallback}
@@ -91,7 +100,7 @@ func NewArgs() Args {
91100
// 获取单个拼音中的声母
92101
func initial(p string) string {
93102
s := ""
94-
for _, v := range initials {
103+
for _, v := range initialArray {
95104
if strings.HasPrefix(p, v) {
96105
s = v
97106
break
@@ -102,17 +111,45 @@ func initial(p string) string {
102111

103112
// 获取单个拼音中的韵母
104113
func final(p string) string {
105-
i := initial(p)
106-
if i == "" {
107-
return p
114+
n := initial(p)
115+
if n == "" {
116+
return handleYW(p)
108117
}
109-
return strings.Join(strings.SplitN(p, i, 2), "")
118+
119+
// 特例 j/q/x
120+
matches := reFinalExceptions.FindStringSubmatch(p)
121+
// jū -> jǖ
122+
if len(matches) == 3 && matches[1] != "" && matches[2] != "" {
123+
v, _ := finalExceptionsMap[matches[2]]
124+
return v
125+
}
126+
// ju -> jv, ju1 -> jv1
127+
p = reFinal2Exceptions.ReplaceAllString(p, "${1}v$2")
128+
return strings.Join(strings.SplitN(p, n, 2), "")
129+
}
130+
131+
// 处理 y, w
132+
func handleYW(p string) string {
133+
// 特例 y/w
134+
if strings.HasPrefix(p, "yu") {
135+
p = "v" + p[2:] // yu -> v
136+
} else if strings.HasPrefix(p, "yi") {
137+
p = p[1:] // yi -> i
138+
} else if strings.HasPrefix(p, "y") {
139+
p = "i" + p[1:] // y -> i
140+
} else if strings.HasPrefix(p, "wu") {
141+
p = p[1:] // wu -> u
142+
} else if strings.HasPrefix(p, "w") {
143+
p = "u" + p[1:] // w -> u
144+
}
145+
return p
110146
}
111147

112148
func toFixed(p string, a Args) string {
113149
if a.Style == Initials {
114150
return initial(p)
115151
}
152+
origP := p
116153

117154
// 替换拼音中的带声调字符
118155
py := rePhoneticSymbol.ReplaceAllStringFunc(p, func(m string) string {
@@ -134,10 +171,18 @@ func toFixed(p string, a Args) string {
134171
switch a.Style {
135172
// 首字母
136173
case FirstLetter:
137-
py = string([]byte(py)[0])
174+
py = py[:1]
138175
// 韵母
139176
case Finals, FinalsTone, FinalsTone2:
140-
py = final(py)
177+
// 转换为 []rune unicode 编码用于获取第一个拼音字符
178+
// 因为 string 是 utf-8 编码不方便获取第一个拼音字符
179+
rs := []rune(origP)
180+
switch string(rs[0]) {
181+
// 因为鼻音没有声母所以不需要去掉声母部分
182+
case "ḿ", "ń", "ň", "ǹ":
183+
default:
184+
py = final(py)
185+
}
141186
}
142187
return py
143188
}

pinyin/main.go

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,51 @@ import (
77
"os"
88
"strings"
99

10+
"github.com/mattn/go-isatty"
1011
"github.com/mozillazg/go-pinyin"
11-
"golang.org/x/crypto/ssh/terminal"
1212
)
1313

1414
func main() {
1515
heteronym := flag.Bool("e", false, "启用多音字模式")
16+
style := flag.String("s", "Tone", "指定拼音风格。可选值:Normal, Tone, Tone2, Initials, FirstLetter, Finals, FinalsTone, FinalsTone2")
1617
flag.Parse()
1718
hans := flag.Args()
18-
args := pinyin.NewArgs()
19-
args.Style = pinyin.Tone
2019
stdin := []byte{}
21-
if !terminal.IsTerminal(0) {
20+
if !isatty.IsTerminal(os.Stdin.Fd()) {
2221
stdin, _ = ioutil.ReadAll(os.Stdin)
2322
}
2423
if len(stdin) > 0 {
2524
hans = append(hans, string(stdin))
2625
}
2726

2827
if len(hans) == 0 {
29-
fmt.Println("请至少输入一个汉字: pinyin HANS [HANS ...]")
28+
fmt.Println("请至少输入一个汉字: pinyin [-e] [-s STYLE] HANS [HANS ...]")
3029
os.Exit(1)
3130
}
31+
32+
args := pinyin.NewArgs()
3233
if *heteronym {
3334
args.Heteronym = true
3435
}
36+
switch *style {
37+
case "Normal":
38+
args.Style = pinyin.Normal
39+
case "Tone2":
40+
args.Style = pinyin.Tone2
41+
case "Initials":
42+
args.Style = pinyin.Initials
43+
case "FirstLetter":
44+
args.Style = pinyin.FirstLetter
45+
case "Finals":
46+
args.Style = pinyin.Finals
47+
case "FinalsTone":
48+
args.Style = pinyin.FinalsTone
49+
case "FinalsTone2":
50+
args.Style = pinyin.FinalsTone2
51+
default:
52+
args.Style = pinyin.Tone
53+
}
54+
3555
pys := pinyin.Pinyin(strings.Join(hans, ""), args)
3656
for _, s := range pys {
3757
fmt.Print(strings.Join(s, ","), " ")

pinyin_test.go

Lines changed: 71 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,13 @@ type testCase struct {
1111
result [][]string
1212
}
1313

14-
func testPinyin(s string, d []testCase, f pinyinFunc) (t *testing.T) {
14+
func testPinyin(t *testing.T, s string, d []testCase, f pinyinFunc) {
1515
for _, tc := range d {
1616
v := f(s, tc.args)
1717
if !reflect.DeepEqual(v, tc.result) {
1818
t.Errorf("Expected %s, got %s", tc.result, v)
1919
}
2020
}
21-
return t
2221
}
2322

2423
func TestPinyin(t *testing.T) {
@@ -125,7 +124,7 @@ func TestPinyin(t *testing.T) {
125124
},
126125
}
127126

128-
testPinyin(hans, testData, Pinyin)
127+
testPinyin(t, hans, testData, Pinyin)
129128

130129
// 测试不是多音字的 Heteronym
131130
hans = "你"
@@ -143,7 +142,7 @@ func TestPinyin(t *testing.T) {
143142
},
144143
},
145144
}
146-
testPinyin(hans, testData, Pinyin)
145+
testPinyin(t, hans, testData, Pinyin)
147146
}
148147

149148
func TestNoneHans(t *testing.T) {
@@ -208,43 +207,6 @@ func TestFinal(t *testing.T) {
208207
}
209208
}
210209

211-
// `yu`, `y`, `w` 不是声母
212-
func TestNewInitials(t *testing.T) {
213-
hans := "鱼"
214-
testData := []testCase{
215-
testCase{
216-
Args{Style: Initials},
217-
[][]string{
218-
[]string{""},
219-
},
220-
},
221-
testCase{
222-
Args{Style: Finals},
223-
[][]string{
224-
[]string{"yu"},
225-
},
226-
},
227-
}
228-
testPinyin(hans, testData, Pinyin)
229-
230-
hans = "五"
231-
testData = []testCase{
232-
testCase{
233-
Args{Style: Initials},
234-
[][]string{
235-
[]string{""},
236-
},
237-
},
238-
testCase{
239-
Args{Style: Finals},
240-
[][]string{
241-
[]string{"wu"},
242-
},
243-
},
244-
}
245-
testPinyin(hans, testData, Pinyin)
246-
}
247-
248210
func TestFallback(t *testing.T) {
249211
hans := "中国人abc"
250212
testData := []testCase{
@@ -291,5 +253,72 @@ func TestFallback(t *testing.T) {
291253
},
292254
},
293255
}
294-
testPinyin(hans, testData, Pinyin)
256+
testPinyin(t, hans, testData, Pinyin)
257+
}
258+
259+
type testItem struct {
260+
hans string
261+
args Args
262+
result [][]string
263+
}
264+
265+
func testPinyinUpdate(t *testing.T, d []testItem, f pinyinFunc) {
266+
for _, tc := range d {
267+
v := f(tc.hans, tc.args)
268+
if !reflect.DeepEqual(v, tc.result) {
269+
t.Errorf("Expected %s, got %s", tc.result, v)
270+
}
271+
}
272+
}
273+
274+
func TestUpdated(t *testing.T) {
275+
testData := []testItem{
276+
// 误把 yu 放到声母列表了
277+
testItem{"鱼", Args{Style: Tone2}, [][]string{[]string{"yu2"}}},
278+
testItem{"鱼", Args{Style: Finals}, [][]string{[]string{"v"}}},
279+
testItem{"雨", Args{Style: Tone2}, [][]string{[]string{"yu3"}}},
280+
testItem{"雨", Args{Style: Finals}, [][]string{[]string{"v"}}},
281+
testItem{"元", Args{Style: Tone2}, [][]string{[]string{"yua2n"}}},
282+
testItem{"元", Args{Style: Finals}, [][]string{[]string{"van"}}},
283+
// y, w 也不是拼音, yu的韵母是v, yi的韵母是i, wu的韵母是u
284+
testItem{"呀", Args{Style: Initials}, [][]string{[]string{""}}},
285+
testItem{"呀", Args{Style: Tone2}, [][]string{[]string{"ya"}}},
286+
testItem{"呀", Args{Style: Finals}, [][]string{[]string{"ia"}}},
287+
testItem{"无", Args{Style: Initials}, [][]string{[]string{""}}},
288+
testItem{"无", Args{Style: Tone2}, [][]string{[]string{"wu2"}}},
289+
testItem{"无", Args{Style: Finals}, [][]string{[]string{"u"}}},
290+
testItem{"衣", Args{Style: Tone2}, [][]string{[]string{"yi1"}}},
291+
testItem{"衣", Args{Style: Finals}, [][]string{[]string{"i"}}},
292+
testItem{"万", Args{Style: Tone2}, [][]string{[]string{"wa4n"}}},
293+
testItem{"万", Args{Style: Finals}, [][]string{[]string{"uan"}}},
294+
// ju, qu, xu 的韵母应该是 v
295+
testItem{"具", Args{Style: FinalsTone}, [][]string{[]string{"ǜ"}}},
296+
testItem{"具", Args{Style: FinalsTone2}, [][]string{[]string{"v4"}}},
297+
testItem{"具", Args{Style: Finals}, [][]string{[]string{"v"}}},
298+
testItem{"取", Args{Style: FinalsTone}, [][]string{[]string{"ǚ"}}},
299+
testItem{"取", Args{Style: FinalsTone2}, [][]string{[]string{"v3"}}},
300+
testItem{"取", Args{Style: Finals}, [][]string{[]string{"v"}}},
301+
testItem{"徐", Args{Style: FinalsTone}, [][]string{[]string{"ǘ"}}},
302+
testItem{"徐", Args{Style: FinalsTone2}, [][]string{[]string{"v2"}}},
303+
testItem{"徐", Args{Style: Finals}, [][]string{[]string{"v"}}},
304+
// # ń
305+
testItem{"嗯", Args{Style: Normal}, [][]string{[]string{"n"}}},
306+
testItem{"嗯", Args{Style: Tone}, [][]string{[]string{"ń"}}},
307+
testItem{"嗯", Args{Style: Tone2}, [][]string{[]string{"n2"}}},
308+
testItem{"嗯", Args{Style: Initials}, [][]string{[]string{""}}},
309+
testItem{"嗯", Args{Style: FirstLetter}, [][]string{[]string{"n"}}},
310+
testItem{"嗯", Args{Style: Finals}, [][]string{[]string{"n"}}},
311+
testItem{"嗯", Args{Style: FinalsTone}, [][]string{[]string{"ń"}}},
312+
testItem{"嗯", Args{Style: FinalsTone2}, [][]string{[]string{"n2"}}},
313+
// # ḿ \u1e3f U+1E3F
314+
testItem{"呣", Args{Style: Normal}, [][]string{[]string{"m"}}},
315+
testItem{"呣", Args{Style: Tone}, [][]string{[]string{"ḿ"}}},
316+
testItem{"呣", Args{Style: Tone2}, [][]string{[]string{"m2"}}},
317+
testItem{"呣", Args{Style: Initials}, [][]string{[]string{""}}},
318+
testItem{"呣", Args{Style: FirstLetter}, [][]string{[]string{"m"}}},
319+
testItem{"呣", Args{Style: Finals}, [][]string{[]string{"m"}}},
320+
testItem{"呣", Args{Style: FinalsTone}, [][]string{[]string{"ḿ"}}},
321+
testItem{"呣", Args{Style: FinalsTone2}, [][]string{[]string{"m2"}}},
322+
}
323+
testPinyinUpdate(t, testData, Pinyin)
295324
}

0 commit comments

Comments
 (0)