Skip to content

Commit

Permalink
BugFix: use hex string for hash as they will collide otherwise. (#216)
Browse files Browse the repository at this point in the history
* use hex string for hash as they will collide otherwise.

* fixed counts.
  • Loading branch information
hariharan-devarajan authored Oct 7, 2024
1 parent 60e502c commit b2fc688
Show file tree
Hide file tree
Showing 12 changed files with 306 additions and 260 deletions.
5 changes: 3 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
"name": "Cmake Debug Target",
"type": "lldb",
"request": "launch",
"program": "${cmake.testProgram}",
"program": "/usr/workspace/haridev/dftracer/build/bin/test_cpp",
"args": [
"${cmake.testArgs}"
"/usr/workspace/haridev/dftracer/build/test/data",
"1"
],
"cwd": "${workspaceFolder}/build",
"env": {
Expand Down
28 changes: 21 additions & 7 deletions dfanalyzer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,23 +175,35 @@ def load_objects(line, fn, time_granularity, time_approximate, condition_fn, loa
if "tid" in val:
d["tid"] = val["tid"]
if "args" in val and "hhash" in val["args"]:
d["hhash"] = val["args"]["hhash"]
if type(val["args"]["hhash"]) is str:
d["hhash"] = int(val["args"]["hhash"],16)
else:
d["hhash"] = val["args"]["hhash"]
if "M" == val["ph"]:
if d["name"] == "FH":
d["type"] = 1 # 1-> file hash
if "args" in val and "name" in val["args"] and "value" in val["args"]:
d["name"] = val["args"]["name"]
d["hash"] = val["args"]["value"]
if type(val["args"]["value"]) is str:
d["hash"] = int(val["args"]["value"],16)
else:
d["hash"] = val["args"]["value"]
elif d["name"] == "HH":
d["type"] = 2 # 2-> hostname hash
if "args" in val and "name" in val["args"] and "value" in val["args"]:
d["name"] = val["args"]["name"]
d["hash"] = val["args"]["value"]
if type(val["args"]["value"]) is str:
d["hash"] = int(val["args"]["value"],16)
else:
d["hash"] = val["args"]["value"]
elif d["name"] == "SH":
d["type"] = 3 # 3-> string hash
if "args" in val and "name" in val["args"] and "value" in val["args"]:
d["name"] = val["args"]["name"]
d["hash"] = val["args"]["value"]
if type(val["args"]["value"]) is str:
d["hash"] = int(val["args"]["value"],16)
else:
d["hash"] = val["args"]["value"]
elif d["name"] == "PR":
d["type"] = 5 # 5-> process metadata
if "args" in val and "name" in val["args"] and "value" in val["args"]:
Expand Down Expand Up @@ -267,8 +279,10 @@ def io_function(json_object, current_dict, time_approximate,condition_fn):
d["io_time"] = I.to_string(I.empty())
if "args" in json_object:
if "fhash" in json_object["args"]:
d["fhash"] = json_object["args"]["fhash"]

if type(json_object["args"]["fhash"]) is str:
d["fhash"] = int(json_object["args"]["fhash"],16)
else:
d["fhash"] = json_object["args"]["fhash"]
if "POSIX" == json_object["cat"] and "ret" in json_object["args"]:
size = int(json_object["args"]["ret"])
if size > 0:
Expand All @@ -290,7 +304,7 @@ def io_columns():
'io_time': "string[pyarrow]" if not conf.time_approximate else "uint64[pyarrow]",
'app_io_time': "string[pyarrow]" if not conf.time_approximate else "uint64[pyarrow]",
'total_time': "string[pyarrow]" if not conf.time_approximate else "uint64[pyarrow]",
'fhash': "uint32[pyarrow]",
'fhash': "uint64[pyarrow]",
'phase': "uint16[pyarrow]",
'size': "uint64[pyarrow]"
}
Expand Down
356 changes: 185 additions & 171 deletions examples/dfanalyzer/dfanalyzer.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions include/dftracer/core/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@
static const int EVENT_TYPE_SIZE = 128;
static const unsigned int DFT_PATH_MAX = 1024 * 4;
static const char SEPARATOR = ';';
static const int HASH_OUTPUT = 16;

#endif // DFTRACER_CONSTANTS_H
22 changes: 11 additions & 11 deletions src/dftracer/brahma/posix.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,24 @@ class POSIXDFTracer : public POSIX {
static bool stop_trace;
static std::shared_ptr<POSIXDFTracer> instance;
static const int MAX_FD = 1024;
uint16_t tracked_fd[MAX_FD];
std::string tracked_fd[MAX_FD];
std::shared_ptr<DFTLogger> logger;
bool trace_all_files;

inline uint16_t is_traced(int fd, const char *func) {
if (fd < 0) return 0;
uint16_t trace = tracked_fd[fd % MAX_FD];
if (trace != 0) {
inline std::string is_traced(int fd, const char *func) {
if (fd < 0) return std::string();
std::string trace = tracked_fd[fd % MAX_FD];
if (trace.empty()) {
DFTRACER_LOG_DEBUG(
"Calling POSIXDFTracer.is_traced for %s and"
" fd %d trace %d",
func, fd, trace != 0);
func, fd, !trace.empty());
}
return trace;
}

inline uint16_t is_traced(const char *filename, const char *func) {
if (stop_trace) return 0;
inline std::string is_traced(const char *filename, const char *func) {
if (stop_trace) return std::string();
if (trace_all_files) {
return logger->hash_and_store(filename, METADATA_NAME_FILE_HASH);
} else {
Expand All @@ -56,7 +56,7 @@ class POSIXDFTracer : public POSIX {
}
}

inline void trace(int fd, uint16_t hash) {
inline void trace(int fd, std::string hash) {
DFTRACER_LOG_DEBUG("Calling POSIXDFTracer.trace for %d and %d", fd, hash);
if (fd == -1) return;
tracked_fd[fd % MAX_FD] = hash;
Expand All @@ -65,13 +65,13 @@ class POSIXDFTracer : public POSIX {
inline void remove_trace(int fd) {
DFTRACER_LOG_DEBUG("Calling POSIXDFTracer.remove_trace for %d", fd);
if (fd == -1) return;
tracked_fd[fd % MAX_FD] = 0;
tracked_fd[fd % MAX_FD] = std::string();
}

public:
POSIXDFTracer(bool trace_all) : POSIX(), trace_all_files(trace_all) {
DFTRACER_LOG_DEBUG("POSIX class intercepted", "");
for (int i = 0; i < MAX_FD; ++i) tracked_fd[i] = 0;
for (int i = 0; i < MAX_FD; ++i) tracked_fd[i] = std::string();
logger = DFT_LOGGER_INIT();
}
void finalize() {
Expand Down
16 changes: 8 additions & 8 deletions src/dftracer/brahma/stdio.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,25 @@ class STDIODFTracer : public STDIO {
private:
static bool stop_trace;
static std::shared_ptr<STDIODFTracer> instance;
std::unordered_map<FILE *, uint16_t> tracked_fh;
std::unordered_map<FILE *, std::string> tracked_fh;
std::shared_ptr<DFTLogger> logger;
bool trace_all_files;

inline uint16_t is_traced(FILE *fh, const char *func) {
inline std::string is_traced(FILE *fh, const char *func) {
DFTRACER_LOG_DEBUG("Calling STDIODFTracer.is_traced for %s", func);
if (stop_trace) return 0;
if (fh == NULL) return 0;
if (stop_trace) return std::string();
if (fh == NULL) return std::string();
auto iter = tracked_fh.find(fh);
if (iter != tracked_fh.end()) {
return iter->second;
}
return 0;
return std::string();
}

inline uint16_t is_traced(const char *filename, const char *func) {
inline std::string is_traced(const char *filename, const char *func) {
DFTRACER_LOG_DEBUG("Calling STDIODFTracer.is_traced with filename for %s",
func);
if (stop_trace) return 0;
if (stop_trace) return std::string();
if (trace_all_files)
return logger->hash_and_store(filename, METADATA_NAME_FILE_HASH);
else {
Expand All @@ -49,7 +49,7 @@ class STDIODFTracer : public STDIO {
}
}

inline void trace(FILE *fh, uint16_t hash) {
inline void trace(FILE *fh, std::string hash) {
DFTRACER_LOG_DEBUG("Calling STDIODFTracer.trace with hash %d", hash);
tracked_fh.insert_or_assign(fh, hash);
}
Expand Down
69 changes: 41 additions & 28 deletions src/dftracer/df_logger.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#ifndef DFTRACER_GENERIC_LOGGER_H
#define DFTRACER_GENERIC_LOGGER_H

#include <dftracer/core/constants.h>
#include <dftracer/core/logging.h>
#include <dftracer/core/singleton.h>
#include <dftracer/utils/configuration_manager.h>
Expand Down Expand Up @@ -44,7 +45,7 @@ class DFTLogger {
std::shared_ptr<dftracer::ChromeWriter> writer;
uint32_t level;
std::vector<int> index_stack;
std::unordered_map<std::string, uint16_t> computed_hash;
std::unordered_map<std::string, std::string> computed_hash;
std::atomic_int index;
bool has_entry;
#ifdef DFTRACER_MPI_ENABLE
Expand Down Expand Up @@ -102,6 +103,19 @@ class DFTLogger {
this->is_init = true;
}
~DFTLogger() {}

inline std::string get_hash(char *name) {
uint8_t result[HASH_OUTPUT];
md5String(name, result);
std::string hash;
hash.reserve(HASH_OUTPUT + 1);
for (int i = 0; i < HASH_OUTPUT; ++i) {
sprintf(hash.data() + i, "%02x", result[i]);
}
hash.data()[HASH_OUTPUT] = '\0';
return hash;
}

inline void update_log_file(std::string log_file, std::string exec_name,
std::string cmd, ProcessID process_id = -1) {
DFTRACER_LOG_DEBUG("DFTLogger.update_log_file %s", log_file.c_str());
Expand All @@ -111,15 +125,15 @@ class DFTLogger {
tid = df_gettid();
}
this->writer = dftracer::Singleton<dftracer::ChromeWriter>::get_instance();
uint16_t hostname_hash;
uint16_t cmd_hash;
uint16_t exec_hash;
std::string hostname_hash;
std::string cmd_hash;
std::string exec_hash;
if (this->writer != nullptr) {
char hostname[256];
gethostname(hostname, 256);
md5String(hostname, &hostname_hash);
hostname_hash = get_hash(hostname);
this->writer->initialize(log_file.data(), this->throw_error,
hostname_hash);
hostname_hash.c_str());
hostname_hash = hash_and_store(hostname, METADATA_NAME_HOSTNAME_HASH);
char thread_name[128];
auto size = sprintf(thread_name, "%lu", this->process_id);
Expand Down Expand Up @@ -218,16 +232,16 @@ class DFTLogger {
return -1;
}

inline uint16_t has_hash(ConstEventNameType key) {
inline std::string has_hash(ConstEventNameType key) {
std::shared_lock<std::shared_mutex> lock(map_mtx);
auto iter = computed_hash.find(key);
if (iter != computed_hash.end()) iter->second;
return 0;
if (iter != computed_hash.end()) return iter->second.c_str();
return std::string();
}

inline void insert_hash(ConstEventNameType key, uint16_t hash) {
inline void insert_hash(ConstEventNameType key, std::string hash) {
std::unique_lock<std::shared_mutex> lock(map_mtx);
computed_hash.insert_or_assign(key, hash);
computed_hash.insert_or_assign(key, hash.c_str());
}

inline TimeResolution get_time() {
Expand Down Expand Up @@ -321,38 +335,37 @@ class DFTLogger {
}
}

inline uint16_t hash_and_store(char *filename, ConstEventNameType name) {
if (filename == NULL) return 0;
inline std::string hash_and_store(char *filename, ConstEventNameType name) {
if (filename == NULL) return std::string();
char file[PATH_MAX];
strcpy(file, filename);
file[PATH_MAX - 1] = '\0';
return hash_and_store_str(file, name);
}

inline uint16_t hash_and_store_str(char file[PATH_MAX],
ConstEventNameType name) {
uint16_t hash = has_hash(file);
if (hash == 0) {
md5String(file, &hash);
insert_hash(file, hash);
inline std::string hash_and_store_str(char file[PATH_MAX],
ConstEventNameType name) {
std::string hash = has_hash(file);
if (hash.empty()) {
hash = get_hash(file);
insert_hash(file, hash.c_str());
if (this->writer != nullptr) {
ThreadID tid = 0;
if (dftracer_tid) {
tid = df_gettid();
}
int current_index = this->enter_event();
this->writer->log_metadata(current_index, file,
std::to_string(hash).c_str(), name,
this->process_id, tid, false);
this->writer->log_metadata(current_index, file, hash.c_str(), name,
this->process_id, tid);
this->exit_event();
}
}
return hash;
}

inline uint16_t hash_and_store(const char *filename,
ConstEventNameType name) {
if (filename == NULL) return 0;
inline std::string hash_and_store(const char *filename,
ConstEventNameType name) {
if (filename == NULL) return std::string();
char file[PATH_MAX];
strcpy(file, filename);
file[PATH_MAX - 1] = '\0';
Expand Down Expand Up @@ -386,15 +399,15 @@ class DFTLogger {

#define DFT_LOGGER_UPDATE_HASH(value) \
if (trace && this->logger->include_metadata) { \
uint16_t value##_hash = \
std::string value##_hash = \
this->logger->hash_and_store(value, METADATA_NAME_FILE_HASH); \
DFT_LOGGER_UPDATE(value##_hash); \
}

#define DFT_LOGGER_START(entity) \
DFTRACER_LOG_DEBUG("Calling function %s", __FUNCTION__); \
uint16_t fhash = is_traced(entity, __FUNCTION__); \
bool trace = fhash != 0; \
std::string fhash = is_traced(entity, __FUNCTION__); \
bool trace = !fhash.empty(); \
TimeResolution start_time = 0; \
std::unordered_map<std::string, std::any> *metadata = nullptr; \
if (trace) { \
Expand Down
9 changes: 5 additions & 4 deletions src/dftracer/utils/md5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* structures.
*/

#include <dftracer/utils/md5.h>
#include "md5.h"

/*
* Constants defined by the MD5 algorithm
Expand Down Expand Up @@ -193,16 +193,17 @@ void md5Step(uint32_t *buffer, uint32_t *input) {
* Functions that run the algorithm on the provided input and put the digest
* into result. result should be able to store 16 bytes.
*/
void md5String(char *input, uint16_t *result) {
void md5String(char *input, uint8_t *result) {
MD5Context ctx;
md5Init(&ctx);
md5Update(&ctx, (uint8_t *)input, strlen(input));
md5Finalize(&ctx);

memcpy(result, ctx.digest, 16);
}

void md5File(FILE *file, uint16_t *result) {
char *input_buffer = (char *)malloc(1024 * 1024);
void md5File(FILE *file, uint8_t *result) {
char *input_buffer = (char *)malloc(1024);
size_t input_size = 0;

MD5Context ctx;
Expand Down
Loading

0 comments on commit b2fc688

Please sign in to comment.