Skip to content

Commit 6911b54

Browse files
committed
Support recording of per operation timer in MDTest, too.
1 parent b21f1ce commit 6911b54

File tree

5 files changed

+57
-20
lines changed

5 files changed

+57
-20
lines changed

Diff for: src/ior.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -1759,7 +1759,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, int rep, IOR_results_t *resul
17591759
if(test->savePerOpDataCSV != NULL) {
17601760
char fname[FILENAME_MAX];
17611761
sprintf(fname, "%s-%d-%05d.csv", test->savePerOpDataCSV, rep, rank);
1762-
ot = OpTimerInit(fname);
1762+
ot = OpTimerInit(fname, test->transferSize);
17631763
}
17641764
// start timer after random offset was generated
17651765
startForStonewall = GetTimeStamp();
@@ -1875,7 +1875,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, int rep, IOR_results_t *resul
18751875
point->pairs_accessed = pairCnt;
18761876
}
18771877

1878-
OpTimerFree(ot);
1878+
OpTimerFree(& ot);
18791879
totalErrorCount += CountErrors(test, access, errors);
18801880

18811881
if (access == WRITE && test->fsync == TRUE) {

Diff for: src/mdtest.c

+42-10
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ typedef struct {
178178
int global_dir_layout;
179179
#endif /* HAVE_LUSTRE_LUSTREAPI */
180180
char * saveRankDetailsCSV; /* save the details about the performance to a file */
181+
char * savePerOpDataCSV;
181182
const char *prologue;
182183
const char *epilogue;
183184

@@ -197,6 +198,7 @@ static mdtest_options_t o;
197198

198199
/* This structure describes the processing status for stonewalling */
199200
typedef struct{
201+
OpTimer * ot; /* Operation timer*/
200202
double start_time;
201203

202204
int stone_wall_timer_seconds;
@@ -238,6 +240,7 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ...
238240
fflush(out_logfile);
239241
}
240242
}
243+
char const * mdtest_test_name(int i);
241244

242245
void parse_dirpath(char *dirpath_arg) {
243246
char * tmp, * token;
@@ -443,11 +446,13 @@ void create_remove_items_helper(const int dirs, const int create, const char *pa
443446

444447
for (uint64_t i = progress->items_start; i < progress->items_per_dir ; ++i) {
445448
if (!dirs) {
449+
double start = GetTimeStamp();
446450
if (create) {
447451
create_file (path, itemNum + i);
448452
} else {
449453
remove_file (path, itemNum + i);
450454
}
455+
if(progress->ot) OpTimerValue(progress->ot, start - progress->start_time, GetTimeStamp() - start);
451456
} else {
452457
create_remove_dirs (path, create, itemNum + i);
453458
}
@@ -644,14 +649,16 @@ void mdtest_stat(const int random, const int dirs, const long dir_iter, const ch
644649

645650
/* below temp used to be hiername */
646651
VERBOSE(3,5,"mdtest_stat %4s: %s", (dirs ? "dir" : "file"), item);
652+
double start = GetTimeStamp();
647653
if (-1 == o.backend->stat (item, &buf, o.backend_options)) {
648654
WARNF("unable to stat %s %s", dirs ? "directory" : "file", item);
649655
}
656+
if(progress->ot) OpTimerValue(progress->ot, start - progress->start_time, GetTimeStamp() - start);
650657
}
651658
}
652659

653660
/* reads all of the items created as specified by the input parameters */
654-
void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
661+
void mdtest_read(int random, int dirs, const long dir_iter, char *path, rank_progress_t * progress) {
655662
uint64_t parent_dir, item_num = 0;
656663
char item[MAX_PATHLEN], temp[MAX_PATHLEN];
657664
aiori_fd_t *aiori_fh;
@@ -732,6 +739,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
732739

733740
o.hints.filePerProc = ! o.shared_file;
734741

742+
double start = GetTimeStamp();
735743
/* open file for reading */
736744
aiori_fh = o.backend->open (item, O_RDONLY, o.backend_options);
737745
if (NULL == aiori_fh) {
@@ -746,7 +754,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
746754
WARNF("unable to read file %s", item);
747755
o.verification_error += 1;
748756
continue;
749-
}
757+
}
750758
int pretend_rank = (2 * o.nstride + rank) % o.size;
751759
if(o.verify_read){
752760
if (o.shared_file) {
@@ -759,6 +767,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
759767
}
760768
}
761769
}
770+
if(progress->ot) OpTimerValue(progress->ot, start - progress->start_time, GetTimeStamp() - start);
762771

763772
/* close file */
764773
o.backend->close (aiori_fh, o.backend_options);
@@ -1200,7 +1209,7 @@ void file_test_create(const int iteration, const int ntasks, const char *path, r
12001209
}
12011210
MPI_Barrier(testComm);
12021211
}
1203-
1212+
12041213
/* create files */
12051214
create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
12061215
if(o.stone_wall_timer_seconds){
@@ -1244,6 +1253,11 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
12441253
/* create phase */
12451254
if (o.create_only ) {
12461255
phase_prepare();
1256+
if(o.savePerOpDataCSV != NULL) {
1257+
char path[MAX_PATHLEN];
1258+
sprintf(path, "%s-%s-%05d.csv", o.savePerOpDataCSV, mdtest_test_name(MDTEST_FILE_CREATE_NUM), rank);
1259+
progress->ot = OpTimerInit(path, o.write_bytes > 0 ? o.write_bytes : 1);
1260+
}
12471261
t_start = GetTimeStamp();
12481262
#ifdef HAVE_GPFSCREATESHARING_T
12491263
/* Enable createSharingHint */
@@ -1270,6 +1284,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
12701284
}
12711285
#endif /* HAVE_GPFSCREATESHARING_T */
12721286
t_end = GetTimeStamp();
1287+
OpTimerFree(& progress->ot);
12731288
updateResult(res, MDTEST_FILE_CREATE_NUM, o.items, t_start, t_end, t_end_before_barrier);
12741289
}else{
12751290
if (o.stoneWallingStatusFile){
@@ -1298,7 +1313,13 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
12981313
/* stat phase */
12991314
if (o.stat_only ) {
13001315
phase_prepare();
1316+
if(o.savePerOpDataCSV != NULL) {
1317+
char path[MAX_PATHLEN];
1318+
sprintf(path, "%s-%s-%05d.csv", o.savePerOpDataCSV, mdtest_test_name(MDTEST_FILE_STAT_NUM), rank);
1319+
progress->ot = OpTimerInit(path, 1);
1320+
}
13011321
t_start = GetTimeStamp();
1322+
progress->start_time = t_start;
13021323
for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
13031324
prep_testdir(iteration, dir_iter);
13041325
if (o.unique_dir_per_task) {
@@ -1318,13 +1339,20 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
13181339
t_end_before_barrier = GetTimeStamp();
13191340
phase_end();
13201341
t_end = GetTimeStamp();
1342+
OpTimerFree(& progress->ot);
13211343
updateResult(res, MDTEST_FILE_STAT_NUM, o.items, t_start, t_end, t_end_before_barrier);
13221344
}
13231345

13241346
/* read phase */
13251347
if (o.read_only ) {
13261348
phase_prepare();
1349+
if(o.savePerOpDataCSV != NULL) {
1350+
char path[MAX_PATHLEN];
1351+
sprintf(path, "%s-%s-%05d.csv", o.savePerOpDataCSV, mdtest_test_name(MDTEST_FILE_READ_NUM), rank);
1352+
progress->ot = OpTimerInit(path, o.read_bytes > 0 ? o.read_bytes : 1);
1353+
}
13271354
t_start = GetTimeStamp();
1355+
progress->start_time = t_start;
13281356
for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
13291357
prep_testdir(iteration, dir_iter);
13301358
if (o.unique_dir_per_task) {
@@ -1340,23 +1368,28 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
13401368

13411369
/* read files */
13421370
if (o.random_seed > 0) {
1343-
mdtest_read(1,0, dir_iter, temp_path);
1371+
mdtest_read(1, 0, dir_iter, temp_path, progress);
13441372
} else {
1345-
mdtest_read(0,0, dir_iter, temp_path);
1373+
mdtest_read(0, 0, dir_iter, temp_path, progress);
13461374
}
13471375
}
13481376
t_end_before_barrier = GetTimeStamp();
13491377
phase_end();
13501378
t_end = GetTimeStamp();
1379+
OpTimerFree(& progress->ot);
13511380
updateResult(res, MDTEST_FILE_READ_NUM, o.items, t_start, t_end, t_end_before_barrier);
13521381
}
13531382

13541383
/* remove phase */
13551384
if (o.remove_only) {
13561385
phase_prepare();
1386+
if(o.savePerOpDataCSV != NULL) {
1387+
sprintf(temp_path, "%s-%s-%05d.csv", o.savePerOpDataCSV, mdtest_test_name(MDTEST_FILE_REMOVE_NUM), rank);
1388+
progress->ot = OpTimerInit(temp_path, o.write_bytes > 0 ? o.write_bytes : 1);
1389+
}
13571390
t_start = GetTimeStamp();
1391+
progress->start_time = t_start;
13581392
progress->items_start = 0;
1359-
13601393
for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
13611394
prep_testdir(iteration, dir_iter);
13621395
if (o.unique_dir_per_task) {
@@ -1369,19 +1402,19 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
13691402
}
13701403

13711404
VERBOSE(3,5,"file_test: rm directories path is '%s'", temp_path );
1372-
13731405
if (o.collective_creates) {
13741406
if (rank == 0) {
13751407
collective_create_remove(0, 0, ntasks, temp_path, progress);
13761408
}
13771409
} else {
1378-
VERBOSE(3,5,"gonna create %s", temp_path);
1410+
VERBOSE(3,5,"gonna remove %s", temp_path);
13791411
create_remove_items(0, 0, 0, 0, temp_path, 0, progress);
13801412
}
13811413
}
13821414
t_end_before_barrier = GetTimeStamp();
13831415
phase_end();
13841416
t_end = GetTimeStamp();
1417+
OpTimerFree(& progress->ot);
13851418
updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items, t_start, t_end, t_end_before_barrier);
13861419
}
13871420

@@ -2322,8 +2355,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
23222355
#endif
23232356
{0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors},
23242357
{0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV},
2358+
{0, "savePerOpDataCSV", "Store the performance of each rank into an individual file prefixed with this option.", OPTION_OPTIONAL_ARGUMENT, 's', & o.savePerOpDataCSV},
23252359
{0, "showRankStatistics", "Include statistics per rank", OPTION_FLAG, 'd', & o.show_perrank_statistics},
2326-
23272360
LAST_OPTION
23282361
};
23292362
options_all_t * global_options = airoi_create_all_module_options(options);
@@ -2364,7 +2397,6 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
23642397
for (i = 1; i < argc; i++) {
23652398
snprintf(&cmd_buffer[strlen(cmd_buffer)], 4096-strlen(cmd_buffer), " '%s'", argv[i]);
23662399
}
2367-
23682400
VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp());
23692401
VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, o.size, numNodes);
23702402
VERBOSE(0,-1,"Command line used: %s", cmd_buffer);

Diff for: src/mdtest.h

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <mpi.h>
55
#include <stdio.h>
66
#include <stdint.h>
7+
#include <utilities.h>
78

89
typedef enum {
910
MDTEST_DIR_CREATE_NUM = 0,

Diff for: src/utilities.c

+10-6
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ int verify_memory_pattern(uint64_t item, char * buffer, size_t bytes, int rand_s
238238
/* Data structure to store information about per-operation timer */
239239
struct OpTimer{
240240
FILE * fd;
241+
int size; /* per op */
241242
double * time;
242243
double * value;
243244
int pos;
@@ -246,19 +247,20 @@ struct OpTimer{
246247
/* by default store 1M operations into the buffer before flushing */
247248
#define OP_BUFFER_SIZE 1000000
248249

249-
OpTimer* OpTimerInit(char * filename){
250+
OpTimer* OpTimerInit(char * filename, int size){
250251
if(filename == NULL) {
251252
return NULL;
252253
}
253254
OpTimer * ot = safeMalloc(sizeof(OpTimer));
255+
ot->size = size;
254256
ot->value = safeMalloc(sizeof(double)*OP_BUFFER_SIZE);
255257
ot->time = safeMalloc(sizeof(double)*OP_BUFFER_SIZE);
256258
ot->pos = 0;
257259
ot->fd = fopen(filename, "w");
258260
if(ot->fd < 0){
259261
ERR("Could not create OpTimer");
260262
}
261-
char buff[] = "time,runtime\n";
263+
char buff[] = "time,runtime,tp\n";
262264
int ret = fwrite(buff, strlen(buff), 1, ot->fd);
263265
if(ret != 1){
264266
FAIL("Cannot write header to OpTimer file");
@@ -271,7 +273,7 @@ void OpTimerFlush(OpTimer* ot){
271273
return;
272274
}
273275
for(int i=0; i < ot->pos; i++){
274-
fprintf(ot->fd, "%.8e,%.8e\n", ot->time[i], ot->value[i]);
276+
fprintf(ot->fd, "%.8e,%.8e,%e\n", ot->time[i], ot->value[i], ot->size/ot->value[i]);
275277
}
276278
ot->pos = 0;
277279
}
@@ -287,10 +289,11 @@ void OpTimerValue(OpTimer* ot, double now, double runTime){
287289
}
288290
}
289291

290-
void OpTimerFree(OpTimer* ot){
291-
if(ot == NULL) {
292+
void OpTimerFree(OpTimer** otp){
293+
if(otp == NULL || *otp == NULL) {
292294
return;
293-
}
295+
}
296+
OpTimer * ot = *otp;
294297
OpTimerFlush(ot);
295298
ot->pos = 0;
296299
free(ot->value);
@@ -299,6 +302,7 @@ void OpTimerFree(OpTimer* ot){
299302
ot->time = NULL;
300303
fclose(ot->fd);
301304
free(ot);
305+
*otp = NULL;
302306
}
303307

304308
void* safeMalloc(uint64_t size){

Diff for: src/utilities.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,10 @@ void updateParsedOptions(IOR_param_t * options, options_all_t * global_options);
6565
size_t NodeMemoryStringToBytes(char *size_str);
6666

6767
typedef struct OpTimer OpTimer;
68-
OpTimer* OpTimerInit(char * filename);
68+
OpTimer* OpTimerInit(char * filename, int size);
6969
void OpTimerValue(OpTimer* otimer_in, double now, double runTime);
7070
void OpTimerFlush(OpTimer* otimer_in);
71-
void OpTimerFree(OpTimer* otimer_in);
71+
void OpTimerFree(OpTimer** otimer_in);
7272

7373
/* Returns -1, if cannot be read */
7474
int64_t ReadStoneWallingIterations(char * const filename, MPI_Comm com);

0 commit comments

Comments
 (0)