@@ -68,28 +68,20 @@ ngram_ord_comparator(const void *a_raw, const void *b_raw)
68
68
}
69
69
70
70
static int
71
- read_ngram_instance (lineiter_t * * li , hash_table_t * wid ,
72
- logmath_t * lmath , int order , int order_max ,
73
- ngram_raw_t * raw_ngram )
71
+ ngrams_raw_read_line (lineiter_t * li , hash_table_t * wid ,
72
+ logmath_t * lmath , int order , int order_max ,
73
+ ngram_raw_t * raw_ngram )
74
74
{
75
- int n ;
75
+ int n , i ;
76
76
int words_expected ;
77
- int i ;
78
77
char * wptr [NGRAM_MAX_ORDER + 1 ];
79
78
uint32 * word_out ;
80
79
81
- if (* li )
82
- * li = lineiter_next (* li );
83
- if (* li == NULL ) {
84
- E_ERROR ("Unexpected end of ARPA file. Failed to read %d-gram\n" ,
85
- order );
86
- return -1 ;
87
- }
88
80
words_expected = order + 1 ;
89
81
if ((n =
90
- str2words (( * li ) -> buf , wptr ,
82
+ str2words (li -> buf , wptr ,
91
83
NGRAM_MAX_ORDER + 1 )) < words_expected ) {
92
- E_ERROR ("Format error; %d-gram ignored: %s \n" , order , ( * li ) -> buf );
84
+ E_ERROR ("Format error; %d-gram ignored at line %d \n" , order , li -> lineno );
93
85
return -1 ;
94
86
}
95
87
@@ -136,12 +128,12 @@ read_ngram_instance(lineiter_t ** li, hash_table_t * wid,
136
128
}
137
129
138
130
static int
139
- ngrams_raw_read_order (ngram_raw_t * * raw_ngrams , lineiter_t * * li ,
140
- hash_table_t * wid , logmath_t * lmath , uint32 count ,
131
+ ngrams_raw_read_section (ngram_raw_t * * raw_ngrams , lineiter_t * * li ,
132
+ hash_table_t * wid , logmath_t * lmath , uint32 * count ,
141
133
int order , int order_max )
142
134
{
143
135
char expected_header [20 ];
144
- uint32 i ;
136
+ uint32 i , cur ;
145
137
146
138
sprintf (expected_header , "\\%d-grams:" , order );
147
139
while (* li && strcmp ((* li )-> buf , expected_header ) != 0 ) {
@@ -153,14 +145,21 @@ ngrams_raw_read_order(ngram_raw_t ** raw_ngrams, lineiter_t ** li,
153
145
return -1 ;
154
146
}
155
147
156
- * raw_ngrams = (ngram_raw_t * ) ckd_calloc (count , sizeof (ngram_raw_t ));
157
- for (i = 0 ; i < count ; i ++ ) {
158
- if (read_ngram_instance (li , wid , lmath , order , order_max ,
159
- & ((* raw_ngrams )[i ])) < 0 )
160
- break ;
148
+ * raw_ngrams = (ngram_raw_t * ) ckd_calloc (* count , sizeof (ngram_raw_t ));
149
+ for (i = 0 , cur = 0 ; i < * count && * li != NULL ; i ++ ) {
150
+ * li = lineiter_next (* li );
151
+ if (* li == NULL ) {
152
+ E_ERROR ("Unexpected end of ARPA file. Failed to read %d-gram\n" ,
153
+ order );
154
+ return -1 ;
155
+ }
156
+ if (ngrams_raw_read_line (* li , wid , lmath , order , order_max ,
157
+ * raw_ngrams + cur ) == 0 ) {
158
+ cur ++ ;
159
+ }
161
160
}
162
-
163
- qsort (* raw_ngrams , count , sizeof (ngram_raw_t ), & ngram_ord_comparator );
161
+ * count = cur ;
162
+ qsort (* raw_ngrams , * count , sizeof (ngram_raw_t ), & ngram_ord_comparator );
164
163
return 0 ;
165
164
}
166
165
@@ -175,8 +174,8 @@ ngrams_raw_read_arpa(lineiter_t ** li, logmath_t * lmath, uint32 * counts,
175
174
(ngram_raw_t * * ) ckd_calloc (order - 1 , sizeof (* raw_ngrams ));
176
175
177
176
for (order_it = 2 ; order_it <= order ; order_it ++ ) {
178
- if (ngrams_raw_read_order (& raw_ngrams [order_it - 2 ], li , wid , lmath ,
179
- counts [ order_it - 1 ] , order_it , order ) < 0 )
177
+ if (ngrams_raw_read_section (& raw_ngrams [order_it - 2 ], li , wid , lmath ,
178
+ counts + order_it - 1 , order_it , order ) < 0 )
180
179
break ;
181
180
}
182
181
0 commit comments