Skip to content

Commit b21f1ce

Browse files
committed
Added functions to record per-operation timer and integrated them into ior.
1 parent acd3a15 commit b21f1ce

File tree

5 files changed

+102
-10
lines changed

5 files changed

+102
-10
lines changed

src/ior.c

+26-10
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ static char **ParseFileName(char *, int *);
6666
static void InitTests(IOR_test_t *);
6767
static void TestIoSys(IOR_test_t *);
6868
static void ValidateTests(IOR_param_t * params, MPI_Comm com);
69-
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
69+
static IOR_offset_t WriteOrRead(IOR_param_t *test, int rep, IOR_results_t *results,
7070
aiori_fd_t *fd, const int access,
7171
IOR_io_buffers *ioBuffers);
7272

@@ -1267,7 +1267,7 @@ static void TestIoSys(IOR_test_t *test)
12671267
CurrentTimeString());
12681268
}
12691269
timer[IOR_TIMER_RDWR_START] = GetTimeStamp();
1270-
dataMoved = WriteOrRead(params, &results[rep], fd, WRITE, &ioBuffers);
1270+
dataMoved = WriteOrRead(params, rep, &results[rep], fd, WRITE, &ioBuffers);
12711271
if (params->verbose >= VERBOSE_4) {
12721272
fprintf(out_logfile, "* data moved = %llu\n", dataMoved);
12731273
fflush(out_logfile);
@@ -1318,7 +1318,7 @@ static void TestIoSys(IOR_test_t *test)
13181318
params->open = WRITECHECK;
13191319
fd = backend->open(testFileName, IOR_RDONLY, params->backend_options);
13201320
if(fd == NULL) FAIL("Cannot open file");
1321-
dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers);
1321+
dataMoved = WriteOrRead(params, rep, &results[rep], fd, WRITECHECK, &ioBuffers);
13221322
backend->close(fd, params->backend_options);
13231323
rankOffset = 0;
13241324
}
@@ -1397,7 +1397,7 @@ static void TestIoSys(IOR_test_t *test)
13971397
CurrentTimeString());
13981398
}
13991399
timer[IOR_TIMER_RDWR_START] = GetTimeStamp();
1400-
dataMoved = WriteOrRead(params, &results[rep], fd, operation_flag, &ioBuffers);
1400+
dataMoved = WriteOrRead(params, rep, &results[rep], fd, operation_flag, &ioBuffers);
14011401
timer[IOR_TIMER_RDWR_STOP] = GetTimeStamp();
14021402
if (params->intraTestBarriers)
14031403
MPI_CHECK(MPI_Barrier(testComm),
@@ -1647,15 +1647,17 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offs
16471647
return (offsetArray);
16481648
}
16491649

1650-
static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t transfer, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access){
1650+
static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t transfer, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access, OpTimer* ot, double startTime){
16511651
IOR_offset_t amtXferred = 0;
16521652

16531653
void *buffer = ioBuffers->buffer;
16541654
if (access == WRITE) {
16551655
/* fills each transfer with a unique pattern
16561656
* containing the offset into the file */
16571657
update_write_memory_pattern(offset, ioBuffers->buffer, transfer, test->setTimeStampSignature, pretendRank, test->dataPacketType, test->gpuMemoryFlags);
1658+
double start = GetTimeStamp();
16581659
amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1660+
if(ot) OpTimerValue(ot, start - startTime, GetTimeStamp() - start);
16591661
if (amtXferred != transfer)
16601662
ERR("cannot write to file");
16611663
if (test->fsyncPerWrite)
@@ -1665,7 +1667,9 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_
16651667
nanosleep( & wait, NULL);
16661668
}
16671669
} else if (access == READ) {
1670+
double start = GetTimeStamp();
16681671
amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1672+
if(ot) OpTimerValue(ot, start - startTime, GetTimeStamp() - start);
16691673
if (amtXferred != transfer)
16701674
ERR("cannot read from file");
16711675
if (test->interIODelay > 0){
@@ -1674,13 +1678,17 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_
16741678
}
16751679
} else if (access == WRITECHECK) {
16761680
invalidate_buffer_pattern(buffer, transfer, test->gpuMemoryFlags);
1681+
double start = GetTimeStamp();
16771682
amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1683+
if(ot) OpTimerValue(ot, start - startTime, GetTimeStamp() - start);
16781684
if (amtXferred != transfer)
16791685
ERR("cannot read from file write check");
16801686
*errors += CompareData(buffer, transfer, test, offset, pretendRank, WRITECHECK);
16811687
} else if (access == READCHECK) {
16821688
invalidate_buffer_pattern(buffer, transfer, test->gpuMemoryFlags);
1689+
double start = GetTimeStamp();
16831690
amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1691+
if(ot) OpTimerValue(ot, start - startTime, GetTimeStamp() - start);
16841692
if (amtXferred != transfer){
16851693
ERR("cannot read from file");
16861694
}
@@ -1703,7 +1711,7 @@ static void prefillSegment(IOR_param_t *test, void * randomPrefillBuffer, int pr
17031711
} else {
17041712
offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize);
17051713
}
1706-
WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & errors, test, fd, ioBuffers, WRITE);
1714+
WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & errors, test, fd, ioBuffers, WRITE, NULL, 0);
17071715
}
17081716
}
17091717
ioBuffers->buffer = oldBuffer;
@@ -1713,7 +1721,7 @@ static void prefillSegment(IOR_param_t *test, void * randomPrefillBuffer, int pr
17131721
* Write or Read data to file(s). This loops through the strides, writing
17141722
* out the data to each block in transfer sizes, until the remainder left is 0.
17151723
*/
1716-
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
1724+
static IOR_offset_t WriteOrRead(IOR_param_t *test, int rep, IOR_results_t *results,
17171725
aiori_fd_t *fd, const int access, IOR_io_buffers *ioBuffers)
17181726
{
17191727
int errors = 0;
@@ -1746,7 +1754,14 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
17461754
memset(randomPrefillBuffer, -1, test->randomPrefillBlocksize);
17471755
}
17481756

1749-
// start timer after random offset was generated
1757+
/* Per operation statistics */
1758+
OpTimer * ot = NULL;
1759+
if(test->savePerOpDataCSV != NULL) {
1760+
char fname[FILENAME_MAX];
1761+
sprintf(fname, "%s-%d-%05d.csv", test->savePerOpDataCSV, rep, rank);
1762+
ot = OpTimerInit(fname);
1763+
}
1764+
// start timer after random offset was generated
17501765
startForStonewall = GetTimeStamp();
17511766
hitStonewall = 0;
17521767

@@ -1787,7 +1802,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
17871802
offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize);
17881803
}
17891804
}
1790-
dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access);
1805+
dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access, ot, startForStonewall);
17911806
pairCnt++;
17921807

17931808
hitStonewall = ((test->deadlineForStonewalling != 0
@@ -1850,7 +1865,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
18501865
offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize);
18511866
}
18521867
}
1853-
dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access);
1868+
dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access, ot, startForStonewall);
18541869
pairCnt++;
18551870
}
18561871
j = 0;
@@ -1860,6 +1875,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
18601875
point->pairs_accessed = pairCnt;
18611876
}
18621877

1878+
OpTimerFree(ot);
18631879
totalErrorCount += CountErrors(test, access, errors);
18641880

18651881
if (access == WRITE && test->fsync == TRUE) {

src/ior.h

+1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ typedef struct
115115
IOR_offset_t expectedAggFileSize; /* calculated aggregate file size */
116116
IOR_offset_t randomPrefillBlocksize; /* prefill option for random IO, the amount of data used for prefill */
117117

118+
char * savePerOpDataCSV; /* save details about each I/O operation into this file */
118119
char * saveRankDetailsCSV; /* save the details about the performance to a file */
119120
int summary_every_test; /* flag to print summary every test, not just at end */
120121
int uniqueDir; /* use unique directory for each fpp */

src/parse_options.c

+3
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt
118118
fclose(fd);
119119
}
120120
params->saveRankDetailsCSV = strdup(value);
121+
} else if (strcasecmp(option, "savePerOpDataCSV") == 0){
122+
params->savePerOpDataCSV = strdup(value);
121123
} else if (strcasecmp(option, "summaryFormat") == 0) {
122124
if(strcasecmp(value, "default") == 0){
123125
outputFormat = OUTPUT_DEFAULT;
@@ -473,6 +475,7 @@ option_help * createGlobalOptions(IOR_param_t * params){
473475
{.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT},
474476
{.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputting the summary", .arg = OPTION_OPTIONAL_ARGUMENT},
475477
{.help=" -O saveRankPerformanceDetailsCSV=<FILE> -- store the performance of each rank into the named CSV file.", .arg = OPTION_OPTIONAL_ARGUMENT},
478+
{.help=" -O savePerOpDataCSV=<FILE> -- store the performance of each rank into an individual file prefixed with this option.", .arg = OPTION_OPTIONAL_ARGUMENT},
476479
{0, "dryRun", "do not perform any I/Os just run evtl. inputs print dummy output", OPTION_FLAG, 'd', & params->dryRun},
477480
LAST_OPTION,
478481
};

src/utilities.c

+66
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,72 @@ int verify_memory_pattern(uint64_t item, char * buffer, size_t bytes, int rand_s
235235
return error;
236236
}
237237

238+
/* Data structure to store information about per-operation timer */
239+
struct OpTimer{
240+
FILE * fd;
241+
double * time;
242+
double * value;
243+
int pos;
244+
};
245+
246+
/* by default store 1M operations into the buffer before flushing */
247+
#define OP_BUFFER_SIZE 1000000
248+
249+
OpTimer* OpTimerInit(char * filename){
250+
if(filename == NULL) {
251+
return NULL;
252+
}
253+
OpTimer * ot = safeMalloc(sizeof(OpTimer));
254+
ot->value = safeMalloc(sizeof(double)*OP_BUFFER_SIZE);
255+
ot->time = safeMalloc(sizeof(double)*OP_BUFFER_SIZE);
256+
ot->pos = 0;
257+
ot->fd = fopen(filename, "w");
258+
if(ot->fd < 0){
259+
ERR("Could not create OpTimer");
260+
}
261+
char buff[] = "time,runtime\n";
262+
int ret = fwrite(buff, strlen(buff), 1, ot->fd);
263+
if(ret != 1){
264+
FAIL("Cannot write header to OpTimer file");
265+
}
266+
return ot;
267+
}
268+
269+
void OpTimerFlush(OpTimer* ot){
270+
if(ot == NULL) {
271+
return;
272+
}
273+
for(int i=0; i < ot->pos; i++){
274+
fprintf(ot->fd, "%.8e,%.8e\n", ot->time[i], ot->value[i]);
275+
}
276+
ot->pos = 0;
277+
}
278+
279+
void OpTimerValue(OpTimer* ot, double now, double runTime){
280+
if(ot == NULL) {
281+
return;
282+
}
283+
ot->time[ot->pos] = now;
284+
ot->value[ot->pos++] = runTime;
285+
if(ot->pos == OP_BUFFER_SIZE){
286+
OpTimerFlush(ot);
287+
}
288+
}
289+
290+
void OpTimerFree(OpTimer* ot){
291+
if(ot == NULL) {
292+
return;
293+
}
294+
OpTimerFlush(ot);
295+
ot->pos = 0;
296+
free(ot->value);
297+
free(ot->time);
298+
ot->value = NULL;
299+
ot->time = NULL;
300+
fclose(ot->fd);
301+
free(ot);
302+
}
303+
238304
void* safeMalloc(uint64_t size){
239305
void * d = malloc(size);
240306
if (d == NULL){

src/utilities.h

+6
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ void DelaySecs(int delay);
6464
void updateParsedOptions(IOR_param_t * options, options_all_t * global_options);
6565
size_t NodeMemoryStringToBytes(char *size_str);
6666

67+
typedef struct OpTimer OpTimer;
68+
OpTimer* OpTimerInit(char * filename);
69+
void OpTimerValue(OpTimer* otimer_in, double now, double runTime);
70+
void OpTimerFlush(OpTimer* otimer_in);
71+
void OpTimerFree(OpTimer* otimer_in);
72+
6773
/* Returns -1, if cannot be read */
6874
int64_t ReadStoneWallingIterations(char * const filename, MPI_Comm com);
6975
void StoreStoneWallingIterations(char * const filename, int64_t count);

0 commit comments

Comments
 (0)