Skip to content
This repository was archived by the owner on Mar 31, 2019. It is now read-only.

Commit 134c49f

Browse files
committed
also remove buffering from header-generation
1 parent 74a8bae commit 134c49f

File tree

2 files changed

+42
-65
lines changed

2 files changed

+42
-65
lines changed

c2numpy.h

+41-64
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <stdarg.h>
2020
#include <string.h>
2121

22+
#include <sstream>
2223
#include <string>
2324
#include <vector>
2425

@@ -162,83 +163,59 @@ int c2numpy_addcolumn(c2numpy_writer *writer, const std::string name, c2numpy_ty
162163
}
163164

164165
int c2numpy_open(c2numpy_writer *writer) {
165-
std::string fileName = writer->outputFilePrefix + std::to_string(writer->currentFileNumber) + ".npy";
166+
std::stringstream fileNameStream;
167+
fileNameStream << writer->outputFilePrefix;
168+
fileNameStream << writer->currentFileNumber;
169+
fileNameStream << ".npy";
170+
std::string fileName = fileNameStream.str();
166171
writer->file = fopen(fileName.c_str(), "wb");
167172

168-
// FIXME: better initial guess about header size before going in 128 byte increments
169-
char *header = NULL;
170-
int64_t headerSize;
171-
for (headerSize = 128; headerSize <= 4294967295; headerSize += 128) {
172-
if (header != NULL) free(header);
173-
header = (char*)malloc(headerSize + 1);
174-
175-
char version1 = headerSize <= 65535;
176-
uint32_t descrSize;
177-
if (version1)
178-
descrSize = headerSize - 10;
179-
else
180-
descrSize = headerSize - 12;
181-
182-
header[0] = 147; // magic
183-
header[1] = 'N';
184-
header[2] = 'U';
185-
header[3] = 'M';
186-
header[4] = 'P';
187-
header[5] = 'Y';
188-
if (version1) {
189-
header[6] = 1; // format version 1.0
190-
header[7] = 0;
191-
const uint16_t descrSize2 = descrSize;
192-
*(uint16_t*)(header + 8) = descrSize2; // version 1.0 has a 16-byte descrSize
193-
}
194-
else {
195-
header[6] = 2; // format version 2.0
196-
header[7] = 0;
197-
*(uint32_t*)(header + 8) = descrSize; // version 2.0 has a 32-byte descrSize
198-
}
173+
std::stringstream headerStream;
174+
headerStream << "{'descr': [";
199175

200-
int64_t offset = headerSize - descrSize;
201-
offset += snprintf((header + offset), headerSize - offset + 1, "{'descr': [");
202-
if (offset >= headerSize) continue;
176+
int column;
177+
for (column = 0; column < writer->numColumns; ++column) {
178+
headerStream << "('" << writer->columnNames[column] << "', '" << c2numpy_descr(writer->columnTypes[column]) << "')";
179+
if (column < writer->numColumns - 1)
180+
headerStream << ", ";
181+
}
203182

204-
int column;
205-
for (column = 0; column < writer->numColumns; ++column) {
206-
offset += snprintf((header + offset), headerSize - offset + 1, "('%s', '%s')",
207-
writer->columnNames[column].c_str(),
208-
c2numpy_descr(writer->columnTypes[column]));
209-
if (offset >= headerSize) continue;
183+
headerStream << "], 'fortran_order': False, 'shape': (";
210184

211-
if (column < writer->numColumns - 1)
212-
offset += snprintf((header + offset), headerSize - offset + 1, ", ");
213-
if (offset >= headerSize) continue;
214-
}
185+
writer->sizeSeekPosition = headerStream.str().size();
215186

216-
offset += snprintf((header + offset), headerSize - offset + 1, "], 'fortran_order': False, 'shape': (");
217-
if (offset >= headerSize) continue;
187+
headerStream << writer->numRowsPerFile;
218188

219-
writer->sizeSeekPosition = offset;
220-
writer->sizeSeekSize = snprintf((header + offset), headerSize - offset + 1, "%d", writer->numRowsPerFile);
221-
offset += writer->sizeSeekSize;
222-
if (offset >= headerSize) continue;
189+
writer->sizeSeekSize = headerStream.str().size() - writer->sizeSeekPosition;
223190

224-
offset += snprintf((header + offset), headerSize - offset + 1, ",), }");
225-
if (offset >= headerSize) continue;
191+
headerStream << ",), }";
226192

227-
while (offset < headerSize) {
228-
if (offset < headerSize - 1)
229-
header[offset] = ' ';
230-
else
231-
header[offset] = '\n';
232-
offset += 1;
233-
}
234-
header[headerSize] = 0;
193+
int headerSize = headerStream.str().size();
194+
char version = 1;
235195

236-
fwrite(header, 1, headerSize, writer->file);
196+
if (headerSize > 65535) version = 2;
197+
while ((6 + 2 + (version == 1 ? 2 : 4) + headerSize) % 16 != 0) {
198+
headerSize += 1;
199+
headerStream << " ";
200+
if (headerSize > 65535) version = 2;
201+
}
237202

238-
return 0;
203+
fwrite("\x93NUMPY", 1, 6, writer->file);
204+
if (version == 1) {
205+
fwrite("\x01\x00", 1, 2, writer->file);
206+
fwrite(&headerSize, 1, 2, writer->file);
207+
writer->sizeSeekPosition += 6 + 2 + 2;
208+
}
209+
else {
210+
fwrite("\x02\x00", 1, 2, writer->file);
211+
fwrite(&headerSize, 1, 4, writer->file);
212+
writer->sizeSeekPosition += 6 + 2 + 4;
239213
}
240214

241-
return -1;
215+
std::string header = headerStream.str();
216+
fwrite(header.c_str(), 1, header.size(), writer->file);
217+
218+
return 0;
242219
}
243220

244221
#define C2NUMPY_CHECK_ITEM { \

test.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ int main(int argc, char **argv) {
2222

2323
c2numpy_writer writer;
2424

25-
c2numpy_init(&writer, "testout", 5);
25+
c2numpy_init(&writer, "testout", 1000);
2626
c2numpy_addcolumn(&writer, "one", C2NUMPY_INTC);
2727
c2numpy_addcolumn(&writer, "two", C2NUMPY_FLOAT64);
2828
c2numpy_addcolumn(&writer, "three", (c2numpy_type)((int)C2NUMPY_STRING + 5));

0 commit comments

Comments
 (0)