-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathot_khmer_machine.rl
84 lines (62 loc) · 1.84 KB
/
ot_khmer_machine.rl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package harfbuzz
// Code generated with ragel -Z -o ot_khmer_machine.go ot_khmer_machine.rl ; sed -i '/^\/\/line/ d' ot_khmer_machine.go ; goimports -w ot_khmer_machine.go DO NOT EDIT.
// ported from harfbuzz/src/hb-ot-shape-complex-khmer-machine.rl Copyright © 2015 Google, Inc. Behdad Esfahbod
const (
khmerConsonantSyllable = iota
khmerBrokenCluster
khmerNonKhmerCluster
)
%%{
machine khmerSyllableMachine;
alphtype byte;
write exports;
write data;
}%%
%%{
export C = 1;
export V = 2;
export ZWNJ = 5;
export ZWJ = 6;
export PLACEHOLDER = 11;
export DOTTEDCIRCLE = 12;
export Coeng= 14;
export Ra = 16;
export Robatic = 20;
export Xgroup = 21;
export Ygroup = 22;
export VAbv = 26;
export VBlw = 27;
export VPre = 28;
export VPst = 29;
c = (C | Ra | V);
cn = c.((ZWJ|ZWNJ)?.Robatic)?;
joiner = (ZWJ | ZWNJ);
xgroup = (joiner*.Xgroup)*;
ygroup = Ygroup*;
# This grammar was experimentally extracted from what Uniscribe allows.
matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?;
syllable_tail = xgroup matra_group xgroup (Coeng.c)? ygroup;
broken_cluster = (Coeng.cn)* (Coeng | syllable_tail);
consonant_syllable = (cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
other = any;
main := |*
consonant_syllable => { foundSyllableKhmer (khmerConsonantSyllable, ts, te, info, &syllableSerial); };
broken_cluster => { foundSyllableKhmer (khmerBrokenCluster, ts, te, info, &syllableSerial); };
other => { foundSyllableKhmer (khmerNonKhmerCluster, ts, te, info, &syllableSerial); };
*|;
}%%
func findSyllablesKhmer (buffer * Buffer) {
var p, ts, te, act, cs int
info := buffer.Info;
%%{
write init;
getkey info[p].complexCategory;
}%%
pe := len(info)
eof := pe
var syllableSerial uint8 = 1;
%%{
write exec;
}%%
_ = act // needed by Ragel, but unused
}