Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
eb6c689
Bring throw helpers to PUSH_COOP_PINVOKE_FRAME plan
am11 Oct 26, 2025
5359b4c
Handle FP regs
am11 Jan 8, 2026
cbb9238
.
am11 Jan 8, 2026
b4a3ab2
wasm
am11 Jan 9, 2026
7df98b1
arm et al.
am11 Jan 9, 2026
14e1537
Merge branch 'main' into feature/il_throw_PUSH_COOP_PINVOKE_FRAME
am11 Jan 9, 2026
fef4204
Account for FP callee-saved regs
am11 Jan 9, 2026
4835a5f
.
am11 Jan 9, 2026
2746edf
.
am11 Jan 10, 2026
95c39e5
Merge branch 'main' into feature/il_throw_PUSH_COOP_PINVOKE_FRAME
am11 Jan 10, 2026
2450d88
fixes and reverts
am11 Jan 10, 2026
7fe9c53
[x64] unix: fix alignment, win: test a random thought
am11 Jan 11, 2026
bd342cf
Merge branch 'main' into feature/il_throw_PUSH_COOP_PINVOKE_FRAME
am11 Jan 11, 2026
e8287a5
linux-x64 fixes
am11 Jan 11, 2026
8e7a5ff
linux-x64
am11 Jan 11, 2026
9bb83b5
linux-arm32
am11 Jan 11, 2026
bf0e4c5
Revert
am11 Jan 12, 2026
af084b8
Switch to reusing PROLOG_WITH_TRANSITION_BLOCK
am11 Jan 12, 2026
17845a5
Merge branch 'main' into feature/il_throw_PUSH_COOP_PINVOKE_FRAME
am11 Jan 12, 2026
b5e6e6e
Revert "Switch to reusing PROLOG_WITH_TRANSITION_BLOCK"
am11 Jan 12, 2026
40313c1
Merge branch 'main' into feature/il_throw_PUSH_COOP_PINVOKE_FRAME
am11 Jan 12, 2026
0147c5c
Address RBP feedback
am11 Jan 12, 2026
22571b2
Prevent FP exceptions during JIT on AMD64 Windows
am11 Jan 12, 2026
fa44e1e
Merge branch 'main' into feature/il_throw_PUSH_COOP_PINVOKE_FRAME
am11 Jan 12, 2026
b109060
Remove obsolete comment (now that Init() is gone)
am11 Jan 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions src/coreclr/pal/inc/unixasmmacrosamd64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ C_FUNC(\Name\()_End):
// the xmm registers are not supported by the libunwind
.endm

// Unaligned version for use when stack alignment cannot be guaranteed
.macro save_xmm128_postrsp_unaligned Reg, Offset
__Offset = \Offset
movdqu xmmword ptr [rsp + __Offset], \Reg
.endm

.macro restore_xmm128 Reg, ofs
__Offset = \ofs
movdqa \Reg, xmmword ptr [rsp + __Offset]
Expand Down Expand Up @@ -246,6 +252,20 @@ C_FUNC(\Name\()_End):

.endm

// Unaligned version for cases where 16-byte stack alignment cannot be guaranteed
.macro SAVE_FLOAT_ARGUMENT_REGISTERS_UNALIGNED ofs

save_xmm128_postrsp_unaligned xmm0, \ofs
save_xmm128_postrsp_unaligned xmm1, \ofs + 0x10
save_xmm128_postrsp_unaligned xmm2, \ofs + 0x20
save_xmm128_postrsp_unaligned xmm3, \ofs + 0x30
save_xmm128_postrsp_unaligned xmm4, \ofs + 0x40
save_xmm128_postrsp_unaligned xmm5, \ofs + 0x50
save_xmm128_postrsp_unaligned xmm6, \ofs + 0x60
save_xmm128_postrsp_unaligned xmm7, \ofs + 0x70

.endm

.macro RESTORE_FLOAT_ARGUMENT_REGISTERS ofs

restore_xmm128 xmm0, \ofs
Expand Down Expand Up @@ -428,6 +448,72 @@ C_FUNC(\Name\()_End):
POP_CALLEE_SAVED_REGISTERS
.endm

// Pushes a full TransitionBlock on the stack including argument registers and
// floating point argument registers. Used for exception throw helpers where we
// need to capture the complete register state.
//
// Stack layout (from high to low address after prologue):
// Return address (8 bytes)
// CalleeSavedRegisters (rbp, rbx, r15, r14, r13, r12 - 48 bytes)
// ArgumentRegisters (r9, r8, rcx, rdx, rsi, rdi - 48 bytes) <- TransitionBlock pointer
// FloatArgumentRegisters (xmm0-xmm7, 128 bytes) at rsp+8
// 8-byte alignment padding at rsp+0
// sp points here
//
// Stack alignment calculation:
// Before call to IL_Throw: rsp is 16-byte aligned (ABI requirement before call)
// After call (return addr pushed): rsp % 16 = 8
// After PUSH_CALLEE_SAVED_REGISTERS (48 bytes): rsp % 16 = 8
// After PUSH_ARGUMENT_REGISTERS (48 bytes): rsp % 16 = 8
// After alloc_stack 136: rsp % 16 = (8 - 136 % 16) = (8 - 8) = 0 <- aligned!
//
// Stack layout for IL_Throw helpers using TransitionBlock with float registers.
//
// Stack alignment calculation:
// Before call to IL_Throw: rsp is 16-byte aligned (ABI requirement before call)
// After call (return addr pushed): rsp % 16 = 8
// After PUSH_CALLEE_SAVED_REGISTERS (48 bytes): rsp % 16 = 8
// After PUSH_ARGUMENT_REGISTERS (48 bytes): rsp % 16 = 8
// After alloc_stack 136: rsp % 16 = (8 - 136 % 16) = (8 - 8) = 0 <- aligned!
//
// Stack layout (low to high addresses):
// rsp+0: 8 bytes padding (for alignment)
// rsp+8: FloatArgumentRegisters (xmm0-xmm7, 128 bytes)
// rsp+136: TransitionBlock start
// - ArgumentRegisters (rdi, rsi, rdx, rcx, r8, r9: 48 bytes)
// - CalleeSavedRegisters (r12, r13, r14, r15, rbx, rbp: 48 bytes)
// - Return address (8 bytes)
//
// TransitionBlock at rsp+136, floats at rsp+8 = TransitionBlock - 128
// (matches GetOffsetOfFloatArgumentRegisters which returns -128)
//
// NOTE: We use SAVE_FLOAT_ARGUMENT_REGISTERS_UNALIGNED because rsp+8 is not
// 16-byte aligned (rsp is aligned, rsp+8 is not).
//
// On exit, \target contains the TransitionBlock pointer.
.macro PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS target
set_cfa_register rsp, 8

PUSH_CALLEE_SAVED_REGISTERS

// Set RBP as frame pointer to facilitate stack walking for 3rd party tools.
// After PUSH_CALLEE_SAVED_REGISTERS, saved RBP is at rsp+40 (5 regs * 8 bytes above current rsp)
lea rbp, [rsp + 40]

PUSH_ARGUMENT_REGISTERS

// Allocate 128 bytes for floats + 8 bytes padding = 136 bytes
alloc_stack 136
// Save float argument registers at offset 8 (TransitionBlock - 128)
// Using unaligned stores because rsp+8 is not 16-byte aligned
SAVE_FLOAT_ARGUMENT_REGISTERS_UNALIGNED 8

END_PROLOGUE

// TransitionBlock starts at rsp+136 (where ArgumentRegisters are)
lea \target, [rsp + 136]
.endm

.macro INLINE_GETTHREAD
// Inlined version of call C_FUNC(RhpGetThread)
INLINE_GET_TLS_VAR t_CurrentThreadInfo
Expand Down
37 changes: 37 additions & 0 deletions src/coreclr/pal/inc/unixasmmacrosarm.inc
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,43 @@ C_FUNC(\Name):
add \target, sp, 4
.endm

// Pushes a full TransitionBlock on the stack including float argument registers.
// On exit, \target contains the TransitionBlock pointer.
//
// Stack layout (from sp going up):
// sp+0: padding (4 bytes) - for 8-byte alignment
// sp+4: d8-d15 (64 bytes) - FP callee-saved
// sp+68: padding (4 bytes) - to make d0-d7 8-byte aligned at TransitionBlock-68
// sp+72: d0-d7 (64 bytes) - float argument registers (at TransitionBlock - 68)
// sp+136: padding (4 bytes) - to keep total allocation 8-byte aligned
// sp+140: TransitionBlock starts here (CalleeSavedRegisters + ArgumentRegisters pushed above)
//
// GetNegSpaceSize() for ARM32 = 64 (FloatArgumentRegisters) + 4 (padding) = 68
// GetOffsetOfFloatArgumentRegisters() = -68
//
// Total stack alloc: 4 + 64 + 4 + 64 + 4 = 140 bytes
// Stack: Arguments(16) + callee-saved(36) + alloc(140) = 192 bytes
// 192 % 8 = 0, properly aligned for ARM32 (8-byte alignment required)
.macro PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS target
// Push argument registers (r0-r3) - these will be at highest address in TransitionBlock
PUSH_ARGUMENT_REGISTERS
PUSH_CALLEE_SAVED_REGISTERS
PROLOG_STACK_SAVE_OFFSET r7, #12
// let r7 point the saved r7 in the stack (clang FP style)
// Allocate space for: padding (4) + d8-d15 (64) + padding (4) + d0-d7 (64) + padding (4) = 140 bytes
alloc_stack 140
// Save floating point argument registers (d0-d7) at sp+72 (TransitionBlock - 68)
add r12, sp, #72
vstm r12, {d0-d7}
// Save FP callee-saved registers (d8-d15) at sp+4
add r12, sp, #4
vstm r12, {d8-d15}
CHECK_STACK_ALIGNMENT
END_PROLOGUE
// TransitionBlock is at sp + 140
add \target, sp, #140
.endm

.macro POP_COOP_PINVOKE_FRAME
free_stack 4
POP_CALLEE_SAVED_REGISTERS
Expand Down
49 changes: 48 additions & 1 deletion src/coreclr/pal/inc/unixasmmacrosarm64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,54 @@ C_FUNC(\Name\()_End):
EPILOG_RESTORE_REG_PAIR x25, x26, 64
EPILOG_RESTORE_REG_PAIR x27, x28, 80
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 176
.endm
.endm

// Pushes a full TransitionBlock on the stack including argument registers and
// floating point argument registers. Used for exception throw helpers where we
// need to capture the complete register state including FP callee-saved registers.
//
// Stack layout (from low to high address):
// sp+0: FP callee-saved registers (d8-d15, 64 bytes)
// sp+64: FloatArgumentRegisters (q0-q7, 128 bytes)
// sp+192: TransitionBlock start (176 bytes)
// - CalleeSavedRegisters (fp, lr, x19-x28 - 96 bytes)
// - padding (8 bytes)
// - x8 (8 bytes)
// - ArgumentRegisters (x0-x7, 64 bytes)
//
// On exit, \target contains the TransitionBlock pointer (sp+192).
.macro PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS target
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -176

// Spill callee saved registers
PROLOG_SAVE_REG_PAIR x19, x20, 16
PROLOG_SAVE_REG_PAIR x21, x22, 32
PROLOG_SAVE_REG_PAIR x23, x24, 48
PROLOG_SAVE_REG_PAIR x25, x26, 64
PROLOG_SAVE_REG_PAIR x27, x28, 80

// Allocate space for FloatArgumentRegisters (128) + FP callee-saved (64) = 192 bytes
PROLOG_STACK_ALLOC 192

// Save argument registers (x8, x0-x7) at offset 296 from sp (192 + 104)
SAVE_ARGUMENT_REGISTERS sp, 296

// Save floating point argument registers (q0-q7) at sp+64
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 64

// Save FP callee-saved registers (d8-d15) at sp+0
str d8, [sp, #0]
str d9, [sp, #8]
str d10, [sp, #16]
str d11, [sp, #24]
str d12, [sp, #32]
str d13, [sp, #40]
str d14, [sp, #48]
str d15, [sp, #56]

// Set target to TransitionBlock pointer
add \target, sp, #192
.endm

// ------------------------------------------------------------------
// Macro to generate Redirection Stubs
Expand Down
39 changes: 39 additions & 0 deletions src/coreclr/pal/inc/unixasmmacrosloongarch64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,45 @@ C_FUNC(\Name\()_End):
EPILOG_STACK_FREE 160
.endm

// Pushes a full TransitionBlock on the stack including argument registers and
// floating point argument registers. Used for exception throw helpers where we
// need to capture the complete register state.
//
// Stack layout (from low to high address):
// sp+0: FloatArgumentRegisters (fa0-fa7, 64 bytes)
// sp+64: TransitionBlock start
// - CalleeSavedRegisters (fp, ra, s0-s8 - 96 bytes)
// - ArgumentRegisters (a0-a7, 64 bytes)
//
// On exit, \target contains the TransitionBlock pointer (sp+128).
.macro PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS target
// Stack: FPCalleeSaved(64) + FloatArgs(64) + CalleeSaved(96) + Args(64) = 288 bytes
PROLOG_STACK_ALLOC 288
PROLOG_SAVE_REG_PAIR 22, 1, 128, 1

// Save callee-saved registers at offset 128 (after FP callee-saved and FloatArgumentRegisters)
SAVE_CALLEESAVED_REGISTERS $sp, 128

// Save argument registers (a0-a7) at offset 224
SAVE_ARGUMENT_REGISTERS $sp, 224

// Save floating-point argument registers (fa0-fa7) at offset 64
SAVE_FLOAT_ARGUMENT_REGISTERS $sp, 64

// Save FP callee-saved registers (f24-f31) at offset 0
fst.d $f24, $sp, 0
fst.d $f25, $sp, 8
fst.d $f26, $sp, 16
fst.d $f27, $sp, 24
fst.d $f28, $sp, 32
fst.d $f29, $sp, 40
fst.d $f30, $sp, 48
fst.d $f31, $sp, 56

// Set target to TransitionBlock pointer
addi.d \target, $sp, 128
.endm

// ------------------------------------------------------------------
// Macro to generate Redirection Stubs
//
Expand Down
45 changes: 45 additions & 0 deletions src/coreclr/pal/inc/unixasmmacrosriscv64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,51 @@ C_FUNC(\Name):
EPILOG_STACK_FREE 192
.endm

// Pushes a full TransitionBlock on the stack including argument registers and
// floating point argument registers. Used for exception throw helpers where we
// need to capture the complete register state.
//
// Stack layout (from low to high address):
// sp+0: FloatArgumentRegisters (fa0-fa7, 64 bytes)
// sp+64: TransitionBlock start
// - CalleeSavedRegisters (fp, ra, s1-s11, tp, gp - 120 bytes)
// - padding (8 bytes)
// - ArgumentRegisters (a0-a7, 64 bytes)
//
// On exit, \target contains the TransitionBlock pointer (sp+160).
.macro PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS target
// Stack: FPCalleeSaved(96) + FloatArgs(64) + CalleeSaved(120) + pad(8) + Args(64) = 352 bytes
PROLOG_STACK_ALLOC 352
PROLOG_SAVE_REG_PAIR fp, ra, 160, 1

// Save callee-saved registers at offset 160 (after FP callee-saved and FloatArgumentRegisters)
SAVE_CALLEESAVED_REGISTERS sp, 160

// Save argument registers (a0-a7) at offset 288
SAVE_ARGUMENT_REGISTERS sp, 288

// Save floating-point argument registers (fa0-fa7) at offset 96
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

// Save FP callee-saved registers (fs0-fs11 = f8,f9,f18-f27) at offset 0
// RISC-V FP callee-saved: fs0=f8, fs1=f9, fs2-fs11=f18-f27
fsd fs0, 0(sp) // f8
fsd fs1, 8(sp) // f9
fsd fs2, 16(sp) // f18
fsd fs3, 24(sp) // f19
fsd fs4, 32(sp) // f20
fsd fs5, 40(sp) // f21
fsd fs6, 48(sp) // f22
fsd fs7, 56(sp) // f23
fsd fs8, 64(sp) // f24
fsd fs9, 72(sp) // f25
fsd fs10, 80(sp) // f26
fsd fs11, 88(sp) // f27

// Set target to TransitionBlock pointer
addi \target, sp, 160
.endm

// ------------------------------------------------------------------
// Macro to generate Redirection Stubs
//
Expand Down
50 changes: 50 additions & 0 deletions src/coreclr/vm/amd64/AsmHelpers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ extern ProfileLeave:proc
extern ProfileTailcall:proc
extern OnHijackWorker:proc
extern JIT_RareDisableHelperWorker:proc
extern IL_Throw_Impl:proc
extern IL_ThrowExact_Impl:proc
extern IL_Rethrow_Impl:proc
ifdef FEATURE_INTERPRETER
extern ExecuteInterpretedMethod:proc
extern GetInterpThreadContextWithPossiblyMissingThreadOrCallStub:proc
Expand Down Expand Up @@ -1200,4 +1203,51 @@ NESTED_END CallJittedMethodRetI8, _TEXT

endif ; FEATURE_INTERPRETER

;==========================================================================
; Capture a transition block with register values and call the IL_Throw_Impl
; implementation written in C.
;
; Input state:
; RCX = Pointer to exception object
;==========================================================================
NESTED_ENTRY IL_Throw, _TEXT
PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS rdx

; RCX already contains exception object
; RDX contains pointer to TransitionBlock
call IL_Throw_Impl
; Should never return
int 3
NESTED_END IL_Throw, _TEXT

;==========================================================================
; Capture a transition block with register values and call the IL_ThrowExact_Impl
; implementation written in C.
;
; Input state:
; RCX = Pointer to exception object
;==========================================================================
NESTED_ENTRY IL_ThrowExact, _TEXT
PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS rdx

; RCX already contains exception object
; RDX contains pointer to TransitionBlock
call IL_ThrowExact_Impl
; Should never return
int 3
NESTED_END IL_ThrowExact, _TEXT

;==========================================================================
; Capture a transition block with register values and call the IL_Rethrow_Impl
; implementation written in C.
;==========================================================================
NESTED_ENTRY IL_Rethrow, _TEXT
PUSH_COOP_PINVOKE_FRAME_WITH_FLOATS rcx

; RCX contains pointer to TransitionBlock
call IL_Rethrow_Impl
; Should never return
int 3
NESTED_END IL_Rethrow, _TEXT

end
Loading
Loading