7
7
8
8
class TextSegment :
9
9
def __init__ (self ):
10
- self ._text : GraphemeString = GraphemeString ("" )
10
+ self ._text : GlyphString = GlyphString ("" )
11
11
self ._immediate_preceding_marker : UsfmMarkerType = UsfmMarkerType .NO_MARKER
12
12
self ._markers_in_preceding_context : Set [UsfmMarkerType ] = set ()
13
13
self .previous_segment : Optional [TextSegment ] = None
@@ -32,7 +32,7 @@ def __eq__(self, value):
32
32
return True
33
33
34
34
@property
35
- def text (self ) -> "GraphemeString " :
35
+ def text (self ) -> "GlyphString " :
36
36
return self ._text
37
37
38
38
@property
@@ -55,7 +55,7 @@ def is_last_segment_in_verse(self) -> bool:
55
55
return self .index_in_verse == self .num_segments_in_verse - 1
56
56
57
57
def replace_substring (self , start_index : int , end_index : int , replacement : str ) -> None :
58
- self ._text = GraphemeString (self .substring_before (start_index ) + replacement + self .substring_after (end_index ))
58
+ self ._text = GlyphString (self .substring_before (start_index ) + replacement + self .substring_after (end_index ))
59
59
if self ._usfm_token is not None :
60
60
self ._usfm_token .text = str (self ._text )
61
61
@@ -77,49 +77,49 @@ def set_usfm_token(self, token: UsfmToken) -> "TextSegment.Builder":
77
77
return self
78
78
79
79
def set_text (self , text : str ) -> "TextSegment.Builder" :
80
- self ._text_segment ._text = GraphemeString (text )
80
+ self ._text_segment ._text = GlyphString (text )
81
81
return self
82
82
83
83
def build (self ) -> "TextSegment" :
84
84
return self ._text_segment
85
85
86
86
87
- class GraphemeString :
87
+ class GlyphString :
88
88
def __init__ (self , string : str ) -> None :
89
89
self ._string = string
90
- self ._string_index_by_grapheme_index = {
91
- grapheme_index : string_index
92
- for grapheme_index , string_index in enumerate (
90
+ self ._string_index_by_glyph_index = {
91
+ glyph_index : string_index
92
+ for glyph_index , string_index in enumerate (
93
93
[i for i , c in enumerate (string ) if unicodedata .category (c ) not in ["Mc" , "Mn" ]]
94
94
)
95
95
}
96
96
97
97
def __len__ (self ) -> int :
98
- return len (self ._string_index_by_grapheme_index )
98
+ return len (self ._string_index_by_glyph_index )
99
99
100
100
def __str__ (self ):
101
101
return self ._string
102
102
103
103
def __eq__ (self , other ) -> bool :
104
- if not isinstance (other , GraphemeString ):
104
+ if not isinstance (other , GlyphString ):
105
105
return False
106
106
return self ._string == other ._string
107
107
108
- def __getitem__ (self , key ) -> "GraphemeString " :
108
+ def __getitem__ (self , key ) -> "GlyphString " :
109
109
if isinstance (key , int ):
110
- grapheme_start = self ._normalize_start_index (key )
111
- grapheme_stop = self ._normalize_stop_index (grapheme_start + 1 )
112
- string_start = self ._string_index_by_grapheme_index .get (grapheme_start , len (self ))
113
- string_stop = self ._string_index_by_grapheme_index .get (grapheme_stop , None )
114
- return GraphemeString (self ._string [string_start :string_stop ])
110
+ glyph_start = self ._normalize_start_index (key )
111
+ glyph_stop = self ._normalize_stop_index (glyph_start + 1 )
112
+ string_start = self ._string_index_by_glyph_index .get (glyph_start , len (self ))
113
+ string_stop = self ._string_index_by_glyph_index .get (glyph_stop , None )
114
+ return GlyphString (self ._string [string_start :string_stop ])
115
115
elif isinstance (key , slice ):
116
116
if key .step is not None and key .step != 1 :
117
- raise TypeError ("Steps are not allowed in _GraphemeString slices" )
118
- grapheme_start = self ._normalize_start_index (key .start )
119
- grapheme_stop = self ._normalize_stop_index (key .stop )
120
- string_start = self ._string_index_by_grapheme_index .get (grapheme_start , len (self ))
121
- string_stop = self ._string_index_by_grapheme_index .get (grapheme_stop , None )
122
- return GraphemeString (self ._string [string_start :string_stop ])
117
+ raise TypeError ("Steps are not allowed in _glyphString slices" )
118
+ glyph_start = self ._normalize_start_index (key .start )
119
+ glyph_stop = self ._normalize_stop_index (key .stop )
120
+ string_start = self ._string_index_by_glyph_index .get (glyph_start , len (self ))
121
+ string_stop = self ._string_index_by_glyph_index .get (glyph_stop , None )
122
+ return GlyphString (self ._string [string_start :string_stop ])
123
123
else :
124
124
raise TypeError ("Indices must be integers or slices" )
125
125
@@ -137,10 +137,10 @@ def _normalize_stop_index(self, index: Optional[int]) -> int:
137
137
return len (self ) + index
138
138
return index
139
139
140
- def string_index_to_grapheme_index (self , string_index : int ) -> int :
140
+ def string_index_to_glyph_index (self , string_index : int ) -> int :
141
141
if string_index == len (self ._string ):
142
142
return len (self )
143
- for g_index , s_index in self ._string_index_by_grapheme_index .items ():
143
+ for g_index , s_index in self ._string_index_by_glyph_index .items ():
144
144
if s_index == string_index :
145
145
return g_index
146
- raise ValueError (f"No corresponding grapheme index found for string index { string_index } ." )
146
+ raise ValueError (f"No corresponding glyph index found for string index { string_index } ." )
0 commit comments