diff --git a/CHANGELOG.md b/CHANGELOG.md index 891bd6cb9..fd3e9425d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Unreleased + +**Fixes**: +- Fix AOT interop with managed .NET runtimes. ([#1392](https://github.com/getsentry/sentry-native/pull/1392)) + ## 0.11.1 **Features**: diff --git a/src/backends/sentry_backend_inproc.c b/src/backends/sentry_backend_inproc.c index 4ed124990..bc1c4ae6b 100644 --- a/src/backends/sentry_backend_inproc.c +++ b/src/backends/sentry_backend_inproc.c @@ -455,6 +455,57 @@ registers_from_uctx(const sentry_ucontext_t *uctx) return registers; } +#ifdef SENTRY_PLATFORM_LINUX +static uintptr_t +get_stack_pointer(const sentry_ucontext_t *uctx) +{ +# if defined(__i386__) + return uctx->user_context->uc_mcontext.gregs[REG_ESP]; +# elif defined(__x86_64__) + return uctx->user_context->uc_mcontext.gregs[REG_RSP]; +# elif defined(__arm__) + return uctx->user_context->uc_mcontext.arm_sp; +# elif defined(__aarch64__) + return uctx->user_context->uc_mcontext.sp; +# elif defined(__mips__) || defined(__mips64__) + return uctx->user_context->uc_mcontext.gregs[29]; // REG_SP +# elif defined(__riscv) + return uctx->user_context->uc_mcontext.__gregs[2]; // REG_SP +# elif defined(__s390x__) + return uctx->user_context->uc_mcontext.gregs[15]; +# else + SENTRY_WARN("get_stack_pointer is not implemented for this architecture. " + "Signal chaining may not work as expected."); + return NULL; +# endif +} + +static uintptr_t +get_instruction_pointer(const sentry_ucontext_t *uctx) +{ +# if defined(__i386__) + return uctx->user_context->uc_mcontext.gregs[REG_EIP]; +# elif defined(__x86_64__) + return uctx->user_context->uc_mcontext.gregs[REG_RIP]; +# elif defined(__arm__) + return uctx->user_context->uc_mcontext.arm_pc; +# elif defined(__aarch64__) + return uctx->user_context->uc_mcontext.pc; +# elif defined(__mips__) || defined(__mips64__) + return uctx->user_context->uc_mcontext.pc; +# elif defined(__riscv) + return uctx->user_context->uc_mcontext.__gregs[0]; // REG_PC +# elif defined(__s390x__) + return uctx->user_context->uc_mcontext.psw.addr; +# else + SENTRY_WARN( + "get_instruction_pointer is not implemented for this architecture. " + "Signal chaining may not work as expected."); + return NULL; +# endif +} +#endif + static sentry_value_t make_signal_event( const struct signal_slot *sig_slot, const sentry_ucontext_t *uctx) @@ -533,20 +584,6 @@ handle_ucontext(const sentry_ucontext_t *uctx) SENTRY_INFO("entering signal handler"); - const struct signal_slot *sig_slot = NULL; - for (int i = 0; i < SIGNAL_COUNT; ++i) { -#ifdef SENTRY_PLATFORM_UNIX - if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) { -#elif defined SENTRY_PLATFORM_WINDOWS - if (SIGNAL_DEFINITIONS[i].signum - == uctx->exception_ptrs.ExceptionRecord->ExceptionCode) { -#else -# error Unsupported platform -#endif - sig_slot = &SIGNAL_DEFINITIONS[i]; - } - } - #ifdef SENTRY_PLATFORM_UNIX // inform the sentry_sync system that we're in a signal handler. This will // make mutexes spin on a spinlock instead as it's no longer safe to use a @@ -568,19 +605,54 @@ handle_ucontext(const sentry_ucontext_t *uctx) // handler and that would mean we couldn't enter this handler with // the next signal coming in if we didn't "leave" here. sentry__leave_signal_handler(); + if (!options->enable_logging_when_crashed) { + sentry__logger_enable(); + } + + uintptr_t ip = get_instruction_pointer(uctx); + uintptr_t sp = get_stack_pointer(uctx); // invoke the previous handler (typically the CLR/Mono // signal-to-managed-exception handler) invoke_signal_handler( uctx->signum, uctx->siginfo, (void *)uctx->user_context); + // If the execution returns here in AOT mode, and the instruction + // or stack pointer were changed, it means CLR/Mono converted the + // signal into a managed exception and transferred execution to a + // managed exception handler. + // https://github.com/dotnet/runtime/blob/6d96e28597e7da0d790d495ba834cc4908e442cd/src/mono/mono/mini/exceptions-arm64.c#L538 + if (ip != get_instruction_pointer(uctx) + || sp != get_stack_pointer(uctx)) { + SENTRY_DEBUG("runtime converted the signal to a managed " + "exception, we do not handle the signal"); + return; + } + // let's re-enter because it means this was an actual native crash + if (!options->enable_logging_when_crashed) { + sentry__logger_disable(); + } sentry__enter_signal_handler(); SENTRY_DEBUG( "return from runtime signal handler, we handle the signal"); } #endif + const struct signal_slot *sig_slot = NULL; + for (int i = 0; i < SIGNAL_COUNT; ++i) { +#ifdef SENTRY_PLATFORM_UNIX + if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) { +#elif defined SENTRY_PLATFORM_WINDOWS + if (SIGNAL_DEFINITIONS[i].signum + == uctx->exception_ptrs.ExceptionRecord->ExceptionCode) { +#else +# error Unsupported platform +#endif + sig_slot = &SIGNAL_DEFINITIONS[i]; + } + } + #ifdef SENTRY_PLATFORM_UNIX // use a signal-safe allocator before we tear down. sentry__page_allocator_enable(); diff --git a/tests/fixtures/dotnet_signal/Program.cs b/tests/fixtures/dotnet_signal/Program.cs index 951adaaf7..489b495d6 100644 --- a/tests/fixtures/dotnet_signal/Program.cs +++ b/tests/fixtures/dotnet_signal/Program.cs @@ -51,13 +51,16 @@ static void Main(string[] args) { Console.WriteLine("dereference a NULL object from managed code"); var s = default(string); - var c = s.Length; + var c = s!.Length; } - catch (NullReferenceException exception) + catch (NullReferenceException) { - Console.WriteLine("dereference another NULL object from managed code"); - var s = default(string); - var c = s.Length; + if (args is ["managed-exception"]) + { + Console.WriteLine("dereference another NULL object from managed code"); + var s = default(string); + var c = s!.Length; + } } } } diff --git a/tests/test_dotnet_signals.py b/tests/test_dotnet_signals.py index 8048cc600..08435a1e8 100644 --- a/tests/test_dotnet_signals.py +++ b/tests/test_dotnet_signals.py @@ -35,7 +35,7 @@ def assert_run_dir_with_envelope(database_path): ), f"There is more than one crash envelope ({len(crash_envelopes)}" -def run_dotnet(tmp_path, args): +def run_jit(tmp_path, args): env = os.environ.copy() env["LD_LIBRARY_PATH"] = str(tmp_path) + ":" + env.get("LD_LIBRARY_PATH", "") return subprocess.Popen( @@ -48,19 +48,19 @@ def run_dotnet(tmp_path, args): ) -def run_dotnet_managed_exception(tmp_path): - return run_dotnet(tmp_path, ["dotnet", "run"]) +def run_jit_managed_exception(tmp_path): + return run_jit(tmp_path, ["dotnet", "run", "managed-exception"]) -def run_dotnet_native_crash(tmp_path): - return run_dotnet(tmp_path, ["dotnet", "run", "native-crash"]) +def run_jit_native_crash(tmp_path): + return run_jit(tmp_path, ["dotnet", "run", "native-crash"]) @pytest.mark.skipif( sys.platform != "linux" or is_x86 or is_asan or is_tsan, - reason="dotnet signal handling is currently only supported on 64-bit Linux without sanitizers", + reason="dotnet JIT signal handling is currently only supported on 64-bit Linux without sanitizers", ) -def test_dotnet_signals_inproc(cmake): +def test_jit_signals_inproc(cmake): try: # build native client library with inproc and the example for crash dumping tmp_path = cmake( @@ -84,7 +84,7 @@ def test_dotnet_signals_inproc(cmake): ) # this runs the dotnet program with the Native SDK and chain-at-start, when managed code raises a signal that CLR convert to an exception. - dotnet_run = run_dotnet_managed_exception(tmp_path) + dotnet_run = run_jit_managed_exception(tmp_path) dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate() # the program will fail with a `NullReferenceException`, but the Native SDK won't register a crash. @@ -98,7 +98,7 @@ def test_dotnet_signals_inproc(cmake): assert_empty_run_dir(database_path) # this runs the dotnet program with the Native SDK and chain-at-start, when an actual native crash raises a signal - dotnet_run = run_dotnet_native_crash(tmp_path) + dotnet_run = run_jit_native_crash(tmp_path) dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate() # the program will fail with a SIGSEGV, that has been processed by the Native SDK which produced a crash envelope @@ -112,3 +112,100 @@ def test_dotnet_signals_inproc(cmake): shutil.rmtree(project_fixture_path / ".sentry-native", ignore_errors=True) shutil.rmtree(project_fixture_path / "bin", ignore_errors=True) shutil.rmtree(project_fixture_path / "obj", ignore_errors=True) + + +def run_aot(tmp_path, args=None): + if args is None: + args = [] + env = os.environ.copy() + env["LD_LIBRARY_PATH"] = str(tmp_path) + ":" + env.get("LD_LIBRARY_PATH", "") + return subprocess.Popen( + [str(tmp_path / "bin/test_dotnet")] + args, + cwd=tmp_path, + env=env, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + +def run_aot_managed_exception(tmp_path): + return run_aot(tmp_path, ["managed-exception"]) + + +def run_aot_native_crash(tmp_path): + return run_aot(tmp_path, ["native-crash"]) + + +@pytest.mark.skipif( + sys.platform != "linux" or is_x86 or is_asan or is_tsan, + reason="dotnet AOT signal handling is currently only supported on 64-bit Linux without sanitizers", +) +def test_aot_signals_inproc(cmake): + try: + # build native client library with inproc and the example for crash dumping + tmp_path = cmake( + ["sentry"], + {"SENTRY_BACKEND": "inproc", "SENTRY_TRANSPORT": "none"}, + ) + + # build the crashing native library + subprocess.run( + [ + "gcc", + "-Wall", + "-Wextra", + "-fPIC", + "-shared", + str(project_fixture_path / "crash.c"), + "-o", + str(tmp_path / "libcrash.so"), + ], + check=True, + ) + + # AOT-compile the dotnet program + subprocess.run( + [ + "dotnet", + "publish", + "-p:PublishAot=true", + "-p:Configuration=Release", + "-o", + str(tmp_path / "bin"), + ], + cwd=project_fixture_path, + check=True, + ) + + # this runs the dotnet program in AOT mode with the Native SDK and chain-at-start, and triggers a `NullReferenceException` + # raising a signal that CLR converts to a managed exception, which is then handled by the managed code and + # not leaked out to the native code so no crash is registered. + dotnet_run = run_aot(tmp_path) + dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate() + + # the program handles the `NullReferenceException`, so the Native SDK won't register a crash. + assert dotnet_run.returncode == 0 + assert not ( + "NullReferenceException" in dotnet_run_stderr + ), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}" + database_path = tmp_path / ".sentry-native" + assert database_path.exists(), "No database-path exists" + assert not (database_path / "last_crash").exists(), "A crash was registered" + assert_empty_run_dir(database_path) + + # this runs the dotnet program with the Native SDK and chain-at-start, when an actual native crash raises a signal + dotnet_run = run_aot_native_crash(tmp_path) + dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate() + + # the program will fail with a SIGSEGV, that has been processed by the Native SDK which produced a crash envelope + assert dotnet_run.returncode != 0 + assert ( + "crash has been captured" in dotnet_run_stderr + ), f"Native exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}" + assert (database_path / "last_crash").exists() + assert_run_dir_with_envelope(database_path) + finally: + shutil.rmtree(tmp_path / ".sentry-native", ignore_errors=True) + shutil.rmtree(project_fixture_path / "bin", ignore_errors=True) + shutil.rmtree(project_fixture_path / "obj", ignore_errors=True)