Skip to content

Commit e3f95c0

Browse files
[SYCL RTC] Make ToolchainFS static thread_local (#20382)
#19924 essentially made it `static` but that caused data races that were later fixed by #20360 changing each use of it to re-create this in-memory FS (essentially, "removing" `static`), incurring significant performance costs. This PR addresses the issue by "adding" `thread_local` instead of "removing" `static` allowing us to have both no crashes due to data races and minimal overhead. No tests added as the one from #20360 is verifying this.
1 parent 53d2038 commit e3f95c0

File tree

1 file changed

+24
-14
lines changed

1 file changed

+24
-14
lines changed

sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -166,20 +166,30 @@ template <> struct std::hash<auto_pch_key> {
166166

167167
namespace {
168168
class SYCLToolchain {
169-
// TODO: For some reason, moving this to a data member of the single instance
170-
// of SYCLToolchain results in some data races leading to memory corruption
171-
// (e.g., ::free() report errors).
172-
static auto getToolchainFS() {
173-
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> ToolchainFS =
174-
llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
175-
using namespace jit_compiler::resource;
176-
177-
for (size_t i = 0; i < NumToolchainFiles; ++i) {
178-
resource_file RF = ToolchainFiles[i];
179-
std::string_view Path{RF.Path.S, RF.Path.Size};
180-
std::string_view Content{RF.Content.S, RF.Content.Size};
181-
ToolchainFS->addFile(Path, 0, llvm::MemoryBuffer::getMemBuffer(Content));
182-
}
169+
static auto &getToolchainFS() {
170+
// TODO: For some reason, removing `thread_local` results in data races
171+
// leading to memory corruption (e.g., ::free() report errors). I'm not sure
172+
// if that's a bug somewhere in clang tooling/LLVMSupport or intentional
173+
// limitation (or maybe a bug in this file, but I can't imagine how could
174+
// that be).
175+
//
176+
// For single thread compilation this gives us [almost] the same performance
177+
// as if there was no `thread_local` so performing very time-consuming
178+
// investigation wouldn't give a justifiable ROI at this moment.
179+
static thread_local const auto ToolchainFS = []() {
180+
llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> ToolchainFS =
181+
llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
182+
using namespace jit_compiler::resource;
183+
184+
for (size_t i = 0; i < NumToolchainFiles; ++i) {
185+
resource_file RF = ToolchainFiles[i];
186+
std::string_view Path{RF.Path.S, RF.Path.Size};
187+
std::string_view Content{RF.Content.S, RF.Content.Size};
188+
ToolchainFS->addFile(Path, 0,
189+
llvm::MemoryBuffer::getMemBuffer(Content));
190+
}
191+
return ToolchainFS;
192+
}();
183193
return ToolchainFS;
184194
}
185195

0 commit comments

Comments
 (0)