|
19 | 19 | #include <stdarg.h>
|
20 | 20 | #include <string.h>
|
21 | 21 |
|
| 22 | +#include <sstream> |
22 | 23 | #include <string>
|
23 | 24 | #include <vector>
|
24 | 25 |
|
@@ -162,83 +163,59 @@ int c2numpy_addcolumn(c2numpy_writer *writer, const std::string name, c2numpy_ty
|
162 | 163 | }
|
163 | 164 |
|
164 | 165 | int c2numpy_open(c2numpy_writer *writer) {
|
165 |
| - std::string fileName = writer->outputFilePrefix + std::to_string(writer->currentFileNumber) + ".npy"; |
| 166 | + std::stringstream fileNameStream; |
| 167 | + fileNameStream << writer->outputFilePrefix; |
| 168 | + fileNameStream << writer->currentFileNumber; |
| 169 | + fileNameStream << ".npy"; |
| 170 | + std::string fileName = fileNameStream.str(); |
166 | 171 | writer->file = fopen(fileName.c_str(), "wb");
|
167 | 172 |
|
168 |
| - // FIXME: better initial guess about header size before going in 128 byte increments |
169 |
| - char *header = NULL; |
170 |
| - int64_t headerSize; |
171 |
| - for (headerSize = 128; headerSize <= 4294967295; headerSize += 128) { |
172 |
| - if (header != NULL) free(header); |
173 |
| - header = (char*)malloc(headerSize + 1); |
174 |
| - |
175 |
| - char version1 = headerSize <= 65535; |
176 |
| - uint32_t descrSize; |
177 |
| - if (version1) |
178 |
| - descrSize = headerSize - 10; |
179 |
| - else |
180 |
| - descrSize = headerSize - 12; |
181 |
| - |
182 |
| - header[0] = 147; // magic |
183 |
| - header[1] = 'N'; |
184 |
| - header[2] = 'U'; |
185 |
| - header[3] = 'M'; |
186 |
| - header[4] = 'P'; |
187 |
| - header[5] = 'Y'; |
188 |
| - if (version1) { |
189 |
| - header[6] = 1; // format version 1.0 |
190 |
| - header[7] = 0; |
191 |
| - const uint16_t descrSize2 = descrSize; |
192 |
| - *(uint16_t*)(header + 8) = descrSize2; // version 1.0 has a 16-byte descrSize |
193 |
| - } |
194 |
| - else { |
195 |
| - header[6] = 2; // format version 2.0 |
196 |
| - header[7] = 0; |
197 |
| - *(uint32_t*)(header + 8) = descrSize; // version 2.0 has a 32-byte descrSize |
198 |
| - } |
| 173 | + std::stringstream headerStream; |
| 174 | + headerStream << "{'descr': ["; |
199 | 175 |
|
200 |
| - int64_t offset = headerSize - descrSize; |
201 |
| - offset += snprintf((header + offset), headerSize - offset + 1, "{'descr': ["); |
202 |
| - if (offset >= headerSize) continue; |
| 176 | + int column; |
| 177 | + for (column = 0; column < writer->numColumns; ++column) { |
| 178 | + headerStream << "('" << writer->columnNames[column] << "', '" << c2numpy_descr(writer->columnTypes[column]) << "')"; |
| 179 | + if (column < writer->numColumns - 1) |
| 180 | + headerStream << ", "; |
| 181 | + } |
203 | 182 |
|
204 |
| - int column; |
205 |
| - for (column = 0; column < writer->numColumns; ++column) { |
206 |
| - offset += snprintf((header + offset), headerSize - offset + 1, "('%s', '%s')", |
207 |
| - writer->columnNames[column].c_str(), |
208 |
| - c2numpy_descr(writer->columnTypes[column])); |
209 |
| - if (offset >= headerSize) continue; |
| 183 | + headerStream << "], 'fortran_order': False, 'shape': ("; |
210 | 184 |
|
211 |
| - if (column < writer->numColumns - 1) |
212 |
| - offset += snprintf((header + offset), headerSize - offset + 1, ", "); |
213 |
| - if (offset >= headerSize) continue; |
214 |
| - } |
| 185 | + writer->sizeSeekPosition = headerStream.str().size(); |
215 | 186 |
|
216 |
| - offset += snprintf((header + offset), headerSize - offset + 1, "], 'fortran_order': False, 'shape': ("); |
217 |
| - if (offset >= headerSize) continue; |
| 187 | + headerStream << writer->numRowsPerFile; |
218 | 188 |
|
219 |
| - writer->sizeSeekPosition = offset; |
220 |
| - writer->sizeSeekSize = snprintf((header + offset), headerSize - offset + 1, "%d", writer->numRowsPerFile); |
221 |
| - offset += writer->sizeSeekSize; |
222 |
| - if (offset >= headerSize) continue; |
| 189 | + writer->sizeSeekSize = headerStream.str().size() - writer->sizeSeekPosition; |
223 | 190 |
|
224 |
| - offset += snprintf((header + offset), headerSize - offset + 1, ",), }"); |
225 |
| - if (offset >= headerSize) continue; |
| 191 | + headerStream << ",), }"; |
226 | 192 |
|
227 |
| - while (offset < headerSize) { |
228 |
| - if (offset < headerSize - 1) |
229 |
| - header[offset] = ' '; |
230 |
| - else |
231 |
| - header[offset] = '\n'; |
232 |
| - offset += 1; |
233 |
| - } |
234 |
| - header[headerSize] = 0; |
| 193 | + int headerSize = headerStream.str().size(); |
| 194 | + char version = 1; |
235 | 195 |
|
236 |
| - fwrite(header, 1, headerSize, writer->file); |
| 196 | + if (headerSize > 65535) version = 2; |
| 197 | + while ((6 + 2 + (version == 1 ? 2 : 4) + headerSize) % 16 != 0) { |
| 198 | + headerSize += 1; |
| 199 | + headerStream << " "; |
| 200 | + if (headerSize > 65535) version = 2; |
| 201 | + } |
237 | 202 |
|
238 |
| - return 0; |
| 203 | + fwrite("\x93NUMPY", 1, 6, writer->file); |
| 204 | + if (version == 1) { |
| 205 | + fwrite("\x01\x00", 1, 2, writer->file); |
| 206 | + fwrite(&headerSize, 1, 2, writer->file); |
| 207 | + writer->sizeSeekPosition += 6 + 2 + 2; |
| 208 | + } |
| 209 | + else { |
| 210 | + fwrite("\x02\x00", 1, 2, writer->file); |
| 211 | + fwrite(&headerSize, 1, 4, writer->file); |
| 212 | + writer->sizeSeekPosition += 6 + 2 + 4; |
239 | 213 | }
|
240 | 214 |
|
241 |
| - return -1; |
| 215 | + std::string header = headerStream.str(); |
| 216 | + fwrite(header.c_str(), 1, header.size(), writer->file); |
| 217 | + |
| 218 | + return 0; |
242 | 219 | }
|
243 | 220 |
|
244 | 221 | #define C2NUMPY_CHECK_ITEM { \
|
|
0 commit comments