Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## Unreleased

**Fixes**:
- Fix AOT interop with managed .NET runtimes. ([#1392](https://github.com/getsentry/sentry-native/pull/1392))

## 0.11.1

**Features**:
Expand Down
100 changes: 86 additions & 14 deletions src/backends/sentry_backend_inproc.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,57 @@ registers_from_uctx(const sentry_ucontext_t *uctx)
return registers;
}

#ifdef SENTRY_PLATFORM_LINUX
static uintptr_t
get_stack_pointer(const sentry_ucontext_t *uctx)
{
# if defined(__i386__)
return uctx->user_context->uc_mcontext.gregs[REG_ESP];
# elif defined(__x86_64__)
return uctx->user_context->uc_mcontext.gregs[REG_RSP];
# elif defined(__arm__)
return uctx->user_context->uc_mcontext.arm_sp;
# elif defined(__aarch64__)
return uctx->user_context->uc_mcontext.sp;
# elif defined(__mips__) || defined(__mips64__)
return uctx->user_context->uc_mcontext.gregs[29]; // REG_SP
# elif defined(__riscv)
return uctx->user_context->uc_mcontext.__gregs[2]; // REG_SP
# elif defined(__s390x__)
return uctx->user_context->uc_mcontext.gregs[15];
# else
SENTRY_WARN("get_stack_pointer is not implemented for this architecture. "
"Signal chaining may not work as expected.");
return NULL;
Comment on lines +477 to +479
Copy link
Collaborator Author

@jpnurmi jpnurmi Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is #error or a runtime warning a better choice here? i ended up with a warning because i thought it would be annoying to potentially break builds for irrelevant platforms that Sentry .NET doesn't even support

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is fine either way, but I would also err on giving a runtime response rather than failing at build time, as the entire execution path is optional.

# endif
}

static uintptr_t
get_instruction_pointer(const sentry_ucontext_t *uctx)
{
# if defined(__i386__)
return uctx->user_context->uc_mcontext.gregs[REG_EIP];
# elif defined(__x86_64__)
return uctx->user_context->uc_mcontext.gregs[REG_RIP];
# elif defined(__arm__)
return uctx->user_context->uc_mcontext.arm_pc;
# elif defined(__aarch64__)
return uctx->user_context->uc_mcontext.pc;
# elif defined(__mips__) || defined(__mips64__)
return uctx->user_context->uc_mcontext.pc;
# elif defined(__riscv)
return uctx->user_context->uc_mcontext.__gregs[0]; // REG_PC
# elif defined(__s390x__)
return uctx->user_context->uc_mcontext.psw.addr;
# else
SENTRY_WARN(
"get_instruction_pointer is not implemented for this architecture. "
"Signal chaining may not work as expected.");
return NULL;
# endif
}
#endif

static sentry_value_t
make_signal_event(
const struct signal_slot *sig_slot, const sentry_ucontext_t *uctx)
Expand Down Expand Up @@ -533,20 +584,6 @@ handle_ucontext(const sentry_ucontext_t *uctx)

SENTRY_INFO("entering signal handler");

const struct signal_slot *sig_slot = NULL;
for (int i = 0; i < SIGNAL_COUNT; ++i) {
#ifdef SENTRY_PLATFORM_UNIX
if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) {
#elif defined SENTRY_PLATFORM_WINDOWS
if (SIGNAL_DEFINITIONS[i].signum
== uctx->exception_ptrs.ExceptionRecord->ExceptionCode) {
#else
# error Unsupported platform
#endif
sig_slot = &SIGNAL_DEFINITIONS[i];
}
}

#ifdef SENTRY_PLATFORM_UNIX
// inform the sentry_sync system that we're in a signal handler. This will
// make mutexes spin on a spinlock instead as it's no longer safe to use a
Expand All @@ -568,19 +605,54 @@ handle_ucontext(const sentry_ucontext_t *uctx)
// handler and that would mean we couldn't enter this handler with
// the next signal coming in if we didn't "leave" here.
sentry__leave_signal_handler();
if (!options->enable_logging_when_crashed) {
sentry__logger_enable();
}

uintptr_t ip = get_instruction_pointer(uctx);
uintptr_t sp = get_stack_pointer(uctx);

// invoke the previous handler (typically the CLR/Mono
// signal-to-managed-exception handler)
invoke_signal_handler(
uctx->signum, uctx->siginfo, (void *)uctx->user_context);

// If the execution returns here in AOT mode, and the instruction
// or stack pointer were changed, it means CLR/Mono converted the
// signal into a managed exception and transferred execution to a
// managed exception handler.
// https://github.com/dotnet/runtime/blob/6d96e28597e7da0d790d495ba834cc4908e442cd/src/mono/mono/mini/exceptions-arm64.c#L538
if (ip != get_instruction_pointer(uctx)
|| sp != get_stack_pointer(uctx)) {
SENTRY_DEBUG("runtime converted the signal to a managed "
"exception, we do not handle the signal");
return;
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Signal Chaining Logic Fails on Some Architectures

The new signal chaining logic has two issues: on architectures where instruction and stack pointers aren't retrieved, it incorrectly proceeds with crash handling even after a runtime converts a signal. Additionally, when a signal conversion is detected and the function returns early, the internal signal handler state becomes unbalanced.

Fix in Cursor Fix in Web

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is absolutely correct, but the only side-effect currently visible is for the logging toggle. Similarly, to how we "leave" the signal handler before chaining, we must also re-enable logging immediately after "leaving" and disable it again before re-entering, because if it were a managed code exception, we want logging to remain enabled.

We can also move the entire sig_slot assignment down below the CHAIN_AT_START code, to make the path dependencies more obvious.

However, I think both have a lower priority than figuring out the signaling sequence of both runtimes and how we can align them.


// let's re-enter because it means this was an actual native crash
if (!options->enable_logging_when_crashed) {
sentry__logger_disable();
}
sentry__enter_signal_handler();
SENTRY_DEBUG(
"return from runtime signal handler, we handle the signal");
}
#endif

const struct signal_slot *sig_slot = NULL;
for (int i = 0; i < SIGNAL_COUNT; ++i) {
#ifdef SENTRY_PLATFORM_UNIX
if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) {
#elif defined SENTRY_PLATFORM_WINDOWS
if (SIGNAL_DEFINITIONS[i].signum
== uctx->exception_ptrs.ExceptionRecord->ExceptionCode) {
#else
# error Unsupported platform
#endif
sig_slot = &SIGNAL_DEFINITIONS[i];
}
}

#ifdef SENTRY_PLATFORM_UNIX
// use a signal-safe allocator before we tear down.
sentry__page_allocator_enable();
Expand Down
13 changes: 8 additions & 5 deletions tests/fixtures/dotnet_signal/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,16 @@ static void Main(string[] args)
{
Console.WriteLine("dereference a NULL object from managed code");
var s = default(string);
var c = s.Length;
var c = s!.Length;
}
catch (NullReferenceException exception)
catch (NullReferenceException)
{
Console.WriteLine("dereference another NULL object from managed code");
var s = default(string);
var c = s.Length;
if (args is ["managed-exception"])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this if necessary? I am not trying to be a stickler, but we should minimize the test code. If I see an if for the "managed-exception" inside the catch, I would assume that this code would be reached if we pass any other argument at the command line, which shouldn't be the case, right?

Copy link
Collaborator Author

@jpnurmi jpnurmi Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, the test re-triggers a NullReferenceException from within the managed exception handler and leaks it into native code:

catch (NullReferenceException exception)
{
Console.WriteLine("dereference another NULL object from managed code");
var s = default(string);
var c = s.Length;
}

I put it behind the "managed-exception" argument to be able to test the scenario where a managed exception is handled without leaking it into native code, to let execution continue normally.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, this highlights a similar divergence to the one I previously raised with the test assertion. Because the initial reason for doing this was to have two managed exceptions, where one is caught in managed code and the other isn't, both would end up in our signal handler.

Neither should create a native event, at least that was my assumption, since they both have nothing to do with an actual native crash, because you would get a stack trace of the runtime (or typically much less than that because whichever stackwalker is in effect will not have sufficient information to walk the runtime's stack).

In the AOT case, though, and apparently Mono too, since CLR AOT seems to be based on Mono AOT, you not only observe two SIGSEGVs (as in the CLR JIT), but also a final SIGABRT coming from the unhandled exception. This SIGABRT from my pov shouldn't trigger a native event either (for the same reason as above).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify: I think it is acceptable for Mono to raise a SIGABRT at the end, but I wouldn't bypass this with an if statement inside the catch.

It is okay, because Sentry does install a top-level handler for .NET anyway, right? So the chance of triggering the SIGABRT handler of the Native SDK is relatively low. And if it triggers anyway, then that might highlight another issue.

But instead of preventing it from happening, I would let the C# code run as initially intended and then show inside the test that the serialized envelope exists in the AOT/Mono case where two managed exceptions are raised, but that it is a SIGABRT and not the SIGSEGV that triggered the managed code exception. This way, you don't hide that behavior and explicitly show the difference between the two implementations in the test assertions.

Does that make sense?

I think developing a heuristic for ignoring that particular SIGABRT is rather an unnecessary investment at this point (except if you already know that it will be a problem downstream).

{
Console.WriteLine("dereference another NULL object from managed code");
var s = default(string);
var c = s!.Length;
}
}
}
}
Expand Down
115 changes: 106 additions & 9 deletions tests/test_dotnet_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def assert_run_dir_with_envelope(database_path):
), f"There is more than one crash envelope ({len(crash_envelopes)}"


def run_dotnet(tmp_path, args):
def run_jit(tmp_path, args):
env = os.environ.copy()
env["LD_LIBRARY_PATH"] = str(tmp_path) + ":" + env.get("LD_LIBRARY_PATH", "")
return subprocess.Popen(
Expand All @@ -48,19 +48,19 @@ def run_dotnet(tmp_path, args):
)


def run_dotnet_managed_exception(tmp_path):
return run_dotnet(tmp_path, ["dotnet", "run"])
def run_jit_managed_exception(tmp_path):
return run_jit(tmp_path, ["dotnet", "run", "managed-exception"])


def run_dotnet_native_crash(tmp_path):
return run_dotnet(tmp_path, ["dotnet", "run", "native-crash"])
def run_jit_native_crash(tmp_path):
return run_jit(tmp_path, ["dotnet", "run", "native-crash"])


@pytest.mark.skipif(
sys.platform != "linux" or is_x86 or is_asan or is_tsan,
reason="dotnet signal handling is currently only supported on 64-bit Linux without sanitizers",
reason="dotnet JIT signal handling is currently only supported on 64-bit Linux without sanitizers",
)
def test_dotnet_signals_inproc(cmake):
def test_jit_signals_inproc(cmake):
try:
# build native client library with inproc and the example for crash dumping
tmp_path = cmake(
Expand All @@ -84,7 +84,7 @@ def test_dotnet_signals_inproc(cmake):
)

# this runs the dotnet program with the Native SDK and chain-at-start, when managed code raises a signal that CLR convert to an exception.
dotnet_run = run_dotnet_managed_exception(tmp_path)
dotnet_run = run_jit_managed_exception(tmp_path)
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()

# the program will fail with a `NullReferenceException`, but the Native SDK won't register a crash.
Expand All @@ -98,7 +98,7 @@ def test_dotnet_signals_inproc(cmake):
assert_empty_run_dir(database_path)

# this runs the dotnet program with the Native SDK and chain-at-start, when an actual native crash raises a signal
dotnet_run = run_dotnet_native_crash(tmp_path)
dotnet_run = run_jit_native_crash(tmp_path)
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()

# the program will fail with a SIGSEGV, that has been processed by the Native SDK which produced a crash envelope
Expand All @@ -112,3 +112,100 @@ def test_dotnet_signals_inproc(cmake):
shutil.rmtree(project_fixture_path / ".sentry-native", ignore_errors=True)
shutil.rmtree(project_fixture_path / "bin", ignore_errors=True)
shutil.rmtree(project_fixture_path / "obj", ignore_errors=True)


def run_aot(tmp_path, args=None):
if args is None:
args = []
env = os.environ.copy()
env["LD_LIBRARY_PATH"] = str(tmp_path) + ":" + env.get("LD_LIBRARY_PATH", "")
return subprocess.Popen(
[str(tmp_path / "bin/test_dotnet")] + args,
cwd=tmp_path,
env=env,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)


def run_aot_managed_exception(tmp_path):
return run_aot(tmp_path, ["managed-exception"])


def run_aot_native_crash(tmp_path):
return run_aot(tmp_path, ["native-crash"])


@pytest.mark.skipif(
sys.platform != "linux" or is_x86 or is_asan or is_tsan,
reason="dotnet AOT signal handling is currently only supported on 64-bit Linux without sanitizers",
)
def test_aot_signals_inproc(cmake):
try:
# build native client library with inproc and the example for crash dumping
tmp_path = cmake(
["sentry"],
{"SENTRY_BACKEND": "inproc", "SENTRY_TRANSPORT": "none"},
)

# build the crashing native library
subprocess.run(
[
"gcc",
"-Wall",
"-Wextra",
"-fPIC",
"-shared",
str(project_fixture_path / "crash.c"),
"-o",
str(tmp_path / "libcrash.so"),
],
check=True,
)

# AOT-compile the dotnet program
subprocess.run(
[
"dotnet",
"publish",
"-p:PublishAot=true",
"-p:Configuration=Release",
"-o",
str(tmp_path / "bin"),
],
cwd=project_fixture_path,
check=True,
)

# this runs the dotnet program in AOT mode with the Native SDK and chain-at-start, and triggers a `NullReferenceException`
# raising a signal that CLR converts to a managed exception, which is then handled by the managed code and
# not leaked out to the native code so no crash is registered.
dotnet_run = run_aot(tmp_path)
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()

# the program handles the `NullReferenceException`, so the Native SDK won't register a crash.
assert dotnet_run.returncode == 0
assert not (
"NullReferenceException" in dotnet_run_stderr
), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
database_path = tmp_path / ".sentry-native"
assert database_path.exists(), "No database-path exists"
assert not (database_path / "last_crash").exists(), "A crash was registered"
assert_empty_run_dir(database_path)

# this runs the dotnet program with the Native SDK and chain-at-start, when an actual native crash raises a signal
dotnet_run = run_aot_native_crash(tmp_path)
dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate()

# the program will fail with a SIGSEGV, that has been processed by the Native SDK which produced a crash envelope
assert dotnet_run.returncode != 0
assert (
"crash has been captured" in dotnet_run_stderr
), f"Native exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}"
assert (database_path / "last_crash").exists()
assert_run_dir_with_envelope(database_path)
finally:
shutil.rmtree(tmp_path / ".sentry-native", ignore_errors=True)
shutil.rmtree(project_fixture_path / "bin", ignore_errors=True)
shutil.rmtree(project_fixture_path / "obj", ignore_errors=True)
Loading