diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 650f2c963b1..871645baf43 100644 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -190,3 +190,8 @@ if (WZ_ENABLE_BASIS_UNIVERSAL) target_compile_definitions(basis_transcoder PRIVATE "-DBASISD_SUPPORT_ATC=0" "-DBASISD_SUPPORT_PVRTC1=0" "-DBASISD_SUPPORT_PVRTC2=0") endif(WZ_ENABLE_BASIS_UNIVERSAL) + +if (WZ_PROFILING_NVTX) + find_package(CUDAToolkit REQUIRED VERSION 5.0) + set(PROFILING_NVTX_INCLUDE ${CUDAToolkit_INCLUDE_DIRS} PARENT_SCOPE) +endif () diff --git a/CMakeLists.txt b/CMakeLists.txt index 1940a383c23..7ceecc323f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,8 @@ OPTION(WZ_ENABLE_BASIS_UNIVERSAL "Enable Basis Universal texture support" ON) OPTION(WZ_DEBUG_GFX_API_LEAKS "Enable debugging for graphics API leaks" ON) OPTION(WZ_FORCE_MINIMAL_OPUSFILE "Force a minimal build of Opusfile, since WZ does not need (or want) HTTP stream support" ON) +OPTION(WZ_PROFILING_NVTX "Add NVTX-based profiling instrumentation to the code" OFF) + if(CMAKE_SYSTEM_NAME MATCHES "Windows" OR CMAKE_SYSTEM_NAME MATCHES "Darwin" OR CMAKE_SYSTEM_NAME MATCHES "Linux") # Only supported on Windows, macOS, and Linux OPTION(ENABLE_DISCORD "Enable Discord presence / join integration" ON) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4dbe5a03848..288eccbbeb1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -123,6 +123,10 @@ find_package(SQLite3 3.14 REQUIRED) target_link_libraries(warzone2100 SQLite::SQLite3) target_link_libraries(warzone2100 SQLiteCpp) +if (WZ_PROFILING_NVTX) + target_include_directories(warzone2100 PRIVATE ${PROFILING_NVTX_INCLUDE}) +endif() + set(_curl_gnutls_thread_safe_fix FALSE) if (DEFINED CURL_GNUTLS_REQUIRES_CALLBACKS) if (CURL_GNUTLS_REQUIRES_CALLBACKS STREQUAL "YES") diff --git a/src/config.h.in b/src/config.h.in index 1aeaa803b02..7ef152832e1 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -176,4 +176,11 @@ #cmakedefine WZ_LOCALEDIR "@WZ_LOCALEDIR@" #cmakedefine WZ_LOCALEDIR_ISABSOLUTE +/* Enables profiling instrumentation. */ +#cmakedefine WZ_PROFILING_INSTRUMENTATION +/* Enables usage of NVTX-based instrumentation backend. */ +#cmakedefine WZ_PROFILING_NVTX +/* Enables usage of VTune-based instrumentation backend. */ +#cmakedefine WZ_PROFILING_VTUNE + #endif // __INCLUDED_WZ_GENERATED_CONFIG_H__ diff --git a/src/fpath.cpp b/src/fpath.cpp index e3b7d3d9736..e7e3d74d415 100644 --- a/src/fpath.cpp +++ b/src/fpath.cpp @@ -39,6 +39,7 @@ #include "astar.h" #include "fpath.h" +#include "profiling.h" // If the path finding system is shutdown or not static volatile bool fpathQuit = false; @@ -86,6 +87,7 @@ static int fpathThreadFunc(void *) continue; } + WZ_PROFILE_SCOPE(fpathJob); // Copy the first job from the queue. packagedPathJob job = std::move(pathJobs.front()); pathJobs.pop_front(); diff --git a/src/loop.cpp b/src/loop.cpp index 77f9622d479..57047e37c05 100644 --- a/src/loop.cpp +++ b/src/loop.cpp @@ -85,6 +85,7 @@ #include "scores.h" #include "clparse.h" #include "gamehistorylogger.h" +#include "profiling.h" #include "warzoneconfig.h" @@ -136,6 +137,7 @@ LEVEL_TYPE nextMissionType = LEVEL_TYPE::LDS_NONE; static GAMECODE renderLoop() { + WZ_PROFILE_SCOPE(renderLoop); if (bMultiPlayer && !NetPlay.isHostAlive && NetPlay.bComms && !NetPlay.isHost) { intAddInGamePopup(); @@ -322,6 +324,7 @@ static GAMECODE renderLoop() displayWorld(); } wzPerfBegin(PERF_GUI, "User interface"); + WZ_PROFILE_SCOPE(DrawUI); /* Display the in game interface */ pie_SetFogStatus(false); @@ -493,6 +496,7 @@ void countUpdate(bool synch) static void gameStateUpdate() { + WZ_PROFILE_SCOPE(gameStateUpdate); syncDebug("map = \"%s\", pseudorandom 32-bit integer = 0x%08X, allocated = %d %d %d %d %d %d %d %d %d %d, position = %d %d %d %d %d %d %d %d %d %d", game.map, gameRandU32(), NetPlay.players[0].allocated, NetPlay.players[1].allocated, NetPlay.players[2].allocated, NetPlay.players[3].allocated, NetPlay.players[4].allocated, NetPlay.players[5].allocated, NetPlay.players[6].allocated, NetPlay.players[7].allocated, NetPlay.players[8].allocated, NetPlay.players[9].allocated, NetPlay.players[0].position, NetPlay.players[1].position, NetPlay.players[2].position, NetPlay.players[3].position, NetPlay.players[4].position, NetPlay.players[5].position, NetPlay.players[6].position, NetPlay.players[7].position, NetPlay.players[8].position, NetPlay.players[9].position @@ -617,6 +621,7 @@ void setMaxFastForwardTicks(optional value, bool fixedToNormalTickRate) /* The main game loop */ GAMECODE gameLoop() { + WZ_PROFILE_SCOPE(gameLoop); static uint32_t lastFlushTime = 0; static size_t numForcedUpdatesLastCall = 0; diff --git a/src/profiling.cpp b/src/profiling.cpp new file mode 100644 index 00000000000..21aa135c7ce --- /dev/null +++ b/src/profiling.cpp @@ -0,0 +1,169 @@ +#include "config.h" +#include +#include + +#ifdef WZ_PROFILING_NVTX +#include +#endif + +#ifdef WZ_PROFILING_VTUNE +#include +#endif + +#include "profiling.h" + +namespace profiling +{ + +struct Domain::Internal +{ +#ifdef WZ_PROFILING_NVTX + nvtxDomainHandle_t nvtxDomain = nullptr; +#endif + +#ifdef WZ_PROFILING_VTUNE + __itt_domain* ittDomain = nullptr; +#endif + std::string name; + + ~Internal() + { +#ifdef WZ_PROFILING_NVTX + if (nvtxDomain) + { + nvtxDomainDestroy(nvtxDomain); + nvtxDomain = nullptr; + } +#endif + } +}; + +Domain::Domain(const char* name) +{ + m_internal = new Internal(); + m_internal->name = name ? name : "Unnamed"; +#ifdef WZ_PROFILING_NVTX + m_internal->nvtxDomain = nvtxDomainCreateA(name); +#endif + +#ifdef WZ_PROFILING_VTUNE + m_internal->ittDomain = __itt_domain_create(name); +#endif +} + +Domain::~Domain() +{ + if (m_internal) + { + delete m_internal; + m_internal = nullptr; + } +} + +Domain wzRootDomain{"warzone2100"}; + +Scope::Scope(const Domain *domain, const char *name) + :m_domain(domain) +{ + if (domain && name) + { + #ifdef WZ_PROFILING_NVTX + { + nvtxRangePushA(name); + } + #endif + #ifdef WZ_PROFILING_VTUNE + { + __itt_string_handle* task = __itt_string_handle_create(name); + auto ittDomain = m_domain ? m_domain->getInternal()->ittDomain : nullptr; + __itt_task_begin(ittDomain, __itt_null, __itt_null, task); + } + #endif + } +} + +Scope::Scope(const Domain *domain, const char *object, const char *name) + :m_domain(domain) +{ + if (m_domain && object && name) + { + static char tmpBuffer[255]; + std::snprintf(tmpBuffer, sizeof(tmpBuffer), "%s::%s", object, name); + #ifdef WZ_PROFILING_NVTX + { + nvtxRangePushA(tmpBuffer); + } + #endif + #ifdef WZ_PROFILING_VTUNE + { + __itt_string_handle* task = __itt_string_handle_create(tmpBuffer); + auto ittDomain = m_domain ? m_domain->getInternal()->ittDomain : nullptr; + __itt_task_begin(ittDomain, __itt_null, __itt_null, task); + } + #endif + } +} + +Scope::~Scope() +{ + if (m_domain) { +#ifdef WZ_PROFILING_NVTX + nvtxRangePop(); +#endif +#ifdef WZ_PROFILING_VTUNE + auto ittDomain = m_domain->getInternal()->ittDomain; + __itt_task_end(ittDomain); +#endif + } +} + +void mark(const Domain *domain, const char *mark) +{ + if (!domain || !mark) + return; + #ifdef WZ_PROFILING_NVTX + { + nvtxEventAttributes_t eventAttrib = {}; + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = mark; + auto nvtxDomain = domain ? domain->getInternal()->nvtxDomain : nullptr; + nvtxDomainMarkEx(nvtxDomain, &eventAttrib); + } + #endif + #ifdef WZ_PROFILING_VTUNE + auto string = __itt_string_handle_create(mark); + auto ittDomain = domain ? domain->getInternal()->ittDomain : nullptr; + __itt_marker(ittDomain, __itt_null, string, __itt_scope::__itt_scope_task); + #endif +} + +void mark(const Domain *domain, const char *object, const char *mark) +{ + if (!domain || !object || !mark) + return; + static char tmpBuffer[255]; + std::snprintf(tmpBuffer, sizeof(tmpBuffer), "%s::%s", object, mark); + + #ifdef WZ_PROFILING_NVTX + { + nvtxEventAttributes_t eventAttrib = {}; + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = tmpBuffer; + auto nvtxDomain = domain ? domain->getInternal()->nvtxDomain : nullptr; + nvtxDomainMarkEx(nvtxDomain, &eventAttrib); + } + #endif + #ifdef WZ_PROFILING_VTUNE + { + auto string = __itt_string_handle_create(msg.c_str()); + auto ittDomain = domain ? domain->getInternal()->ittDomain : nullptr; + __itt_marker(ittDomain, __itt_null, string, __itt_scope::__itt_scope_task); + } + #endif +} + +} diff --git a/src/profiling.h b/src/profiling.h new file mode 100644 index 00000000000..b07df62d379 --- /dev/null +++ b/src/profiling.h @@ -0,0 +1,51 @@ +#ifndef __INCLUDED_SRC_PROFILING_H__ +#define __INCLUDED_SRC_PROFILING_H__ + +#include +#include + +namespace profiling { + +/// Application-level profiling domain. +/// It is often created only once per application or per large component. +class Domain +{ +public: + struct Internal; + + explicit Domain(const char* name); + ~Domain(); + + const Internal* getInternal() const { + return m_internal; + } +private: + Internal* m_internal; + // Some additional opaque data for implementation. + void* m_domain = nullptr; +}; + +/// Profiling scope. +/// Instrumentation backend will create starting mark when the scope is entered +/// and finishing mark when scope is left. +class Scope +{ +public: + Scope(const Domain* domain, const char* name); + Scope(const Domain* domain, const char* object, const char* name); + ~Scope(); + + double elapsed() const; +private: + timespec m_prevTimeStamp; + const Domain* m_domain = nullptr; + uint64_t m_backendRangeId = 0; +}; + +extern Domain wzRootDomain; +} + +#define WZ_PROFILE_SCOPE(name) profiling::Scope mark_##name(&profiling::wzRootDomain, #name); +#define WZ_PROFILE_SCOPE2(object, name) profiling::Scope mark_##name(&profiling::wzRootDomain, #object, #name); + +#endif // __INCLUDED_SRC_PROFILING_H__ diff --git a/src/projectile.cpp b/src/projectile.cpp index 53e8c61d7e1..593623ea75d 100644 --- a/src/projectile.cpp +++ b/src/projectile.cpp @@ -57,6 +57,7 @@ #include "mapgrid.h" #include "random.h" #include "display3d.h" +#include "profiling.h" #include #include @@ -1370,6 +1371,7 @@ void PROJECTILE::update() // iterate through all projectiles and update their status void proj_UpdateAll() { + WZ_PROFILE_SCOPE(proj_UpdateAll); std::vector psProjectileListOld = psProjectileList; // Update all projectiles. Penetrating projectiles may add to psProjectileList. diff --git a/src/visibility.cpp b/src/visibility.cpp index af809f3f841..186eb148610 100644 --- a/src/visibility.cpp +++ b/src/visibility.cpp @@ -48,6 +48,7 @@ #include "multiplay.h" #include "qtscript.h" #include "wavecast.h" +#include "profiling.h" // accuracy for the height gradient #define GRAD_MUL 10000 @@ -833,6 +834,7 @@ static void processVisibilityLevel(BASE_OBJECT *psObj, bool& addedMessage) void processVisibility() { + WZ_PROFILE_SCOPE(processVisibility); updateSpotters(); for (int player = 0; player < MAX_PLAYERS; ++player) {