From 59f5bc13b74d1f5140109da1bd56baaa77f2e299 Mon Sep 17 00:00:00 2001
From: SingleAccretion <AccretionMail@yandex.ru>
Date: Sun, 13 Aug 2023 19:44:01 +0300
Subject: [PATCH] Only use the original shadow stack for filter funclets

---
 src/coreclr/jit/llvm.h          |  4 +--
 src/coreclr/jit/llvmcodegen.cpp | 54 +++++++++++++++++++--------------
 src/coreclr/jit/llvmlower.cpp   |  4 +--
 src/coreclr/jit/llvmlssa.cpp    | 36 +++++++++++-----------
 4 files changed, 52 insertions(+), 46 deletions(-)

diff --git a/src/coreclr/jit/llvm.h b/src/coreclr/jit/llvm.h
index eba1e5cdb674..fd20fcc02931 100644
--- a/src/coreclr/jit/llvm.h
+++ b/src/coreclr/jit/llvm.h
@@ -390,7 +390,7 @@ class Llvm
     CORINFO_GENERIC_HANDLE generateUnwindTable();
 
     bool mayPhysicallyThrow(GenTree* node);
-    bool isBlockInFilter(BasicBlock* block);
+    bool isBlockInFilter(BasicBlock* block) const;
 
     // ================================================================================================================
     // |                                           Shadow stack allocation                                            |
@@ -406,7 +406,7 @@ class Llvm
     void displayInitKindForLocal(unsigned lclNum, ValueInitKind initKind);
 #endif // DEBUG
 
-    unsigned getShadowFrameSize(unsigned hndIndex) const;
+    unsigned getShadowFrameSize(unsigned funcIdx) const;
     bool isShadowFrameLocal(LclVarDsc* varDsc) const;
     bool isShadowStackLocal(unsigned lclNum) const;
     bool isFuncletParameter(unsigned lclNum) const;
diff --git a/src/coreclr/jit/llvmcodegen.cpp b/src/coreclr/jit/llvmcodegen.cpp
index 3211d900f57a..ca5b2c6c0319 100644
--- a/src/coreclr/jit/llvmcodegen.cpp
+++ b/src/coreclr/jit/llvmcodegen.cpp
@@ -98,19 +98,25 @@ bool Llvm::initializeFunctions()
             continue;
         }
 
-        // All funclets have two arguments: original and actual shadow stacks. Catch and filter funclets also
-        // take the "exception object" argument and return int32 (catchret index / retfilt value).
-        Type* ptrLlvmType = getPtrLlvmType();
         FunctionType* llvmFuncType;
-        if (ehDsc->HasCatchHandler())
+        Type* ptrLlvmType = getPtrLlvmType();
+        Type* int32LlvmType = Type::getInt32Ty(m_context->Context);
+        if (funcInfo->funKind == FUNC_FILTER)
+        {
+            // (shadow stack, original shadow stack, exception) -> result.
+            llvmFuncType =
+                FunctionType::get(int32LlvmType, {ptrLlvmType, ptrLlvmType, ptrLlvmType}, /* isVarArg */ false);
+        }
+        else if (ehDsc->HasCatchHandler())
         {
-            llvmFuncType = FunctionType::get(Type::getInt32Ty(m_context->Context),
-                                             {ptrLlvmType, ptrLlvmType, ptrLlvmType}, /* isVarArg */ false);
+            // (shadow stack, exception) -> catchret destination.
+            llvmFuncType = FunctionType::get(int32LlvmType, {ptrLlvmType, ptrLlvmType}, /* isVarArg */ false);
         }
         else
         {
-            llvmFuncType = FunctionType::get(Type::getVoidTy(m_context->Context),
-                                             {ptrLlvmType, ptrLlvmType}, /* isVarArg */ false);
+            // (shadow stack) -> void.
+            assert(ehDsc->HasFinallyOrFaultHandler());
+            llvmFuncType = FunctionType::get(Type::getVoidTy(m_context->Context), {ptrLlvmType}, /* isVarArg */ false);
         }
 
         Function* llvmFunc;
@@ -139,6 +145,11 @@ bool Llvm::initializeFunctions()
             llvmFunc = Function::Create(llvmFuncType, Function::InternalLinkage,
                                         mangledName + Twine("$F") + Twine(funcIdx) + "_" + kindName,
                                         &m_context->Module);
+            if (!ehDsc->HasFinallyHandler())
+            {
+                // Always inline funclets that will have exactly one callsite.
+                llvmFunc->addFnAttr(llvm::Attribute::AlwaysInline);
+            }
         }
         else
         {
@@ -678,7 +689,7 @@ void Llvm::generateEHDispatch()
                     // Call the catch funclet and get its dynamic catchret destination.
                     Function* catchLlvmFunc = getLlvmFunctionForIndex(hndDsc->ebdFuncIndex);
                     Value* catchRetValue =
-                        emitCallOrInvoke(catchLlvmFunc, {getShadowStackForCallee(), getOriginalShadowStack(), caughtValue}, catchPadOpBundle);
+                        emitCallOrInvoke(catchLlvmFunc, {getShadowStack(), caughtValue}, catchPadOpBundle);
 
                     // Create the dispatch switch for all possible catchret destinations. Note how we are doing linear
                     // work here because the funclet creation process will hoist nested handlers, flattening the basic
@@ -748,7 +759,7 @@ void Llvm::generateEHDispatch()
             assert(ehDsc->HasFinallyOrFaultHandler() && isReachable(ehDsc->ebdHndBeg));
 
             Function* hndLlvmFunc = getLlvmFunctionForIndex(ehDsc->ebdFuncIndex);
-            emitCallOrInvoke(hndLlvmFunc, {getShadowStackForCallee(), getOriginalShadowStack()}, catchPadOpBundle);
+            emitCallOrInvoke(hndLlvmFunc, {getShadowStack()}, catchPadOpBundle);
             if ((ehDsc->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) && (m_unwindFrameLclNum != BAD_VAR_NUM))
             {
                 emitHelperCall(CORINFO_HELP_LLVM_EH_POP_UNWOUND_VIRTUAL_FRAMES, {}, catchPadOpBundle);
@@ -2093,8 +2104,8 @@ void Llvm::buildCatchArg(GenTree* catchArg)
     GenTree* nextNode = LIR::AsRange(CurrentBlock()).FirstNonPhiOrCatchArgNode();
     assert((catchArg->gtNext == nextNode) || (catchArg->gtNext->gtNext == nextNode));
 
-    // Exception caught is the third argument to a catch/filter funclet.
-    Value* catchArgValue = getCurrentLlvmFunction()->getArg(2);
+    unsigned exceptionArgIndex = isBlockInFilter(CurrentBlock()) ? 2 : 1;
+    Value* catchArgValue = getCurrentLlvmFunction()->getArg(exceptionArgIndex);
     mapGenTreeToValue(catchArg, catchArgValue);
 }
 
@@ -2253,7 +2264,7 @@ void Llvm::buildCallFinally(BasicBlock* block)
     // Other backends will simply skip generating the second block, while we will branch to it.
     //
     Function* finallyLlvmFunc = getLlvmFunctionForIndex(getLlvmFunctionIndexForBlock(block->bbJumpDest));
-    emitCallOrInvoke(finallyLlvmFunc, {getShadowStackForCallee(), getOriginalShadowStack()});
+    emitCallOrInvoke(finallyLlvmFunc, getShadowStack());
 
     // Some tricky EH flow configurations can make the ALWAYS part of the pair unreachable without
     // marking "block" "BBF_RETLESS_CALL". Detect this case by checking if the next block is reachable
@@ -2807,22 +2818,19 @@ Value* Llvm::getShadowStack()
 // Shadow stack moved up to avoid overwriting anything on the stack in the compiling method
 Value* Llvm::getShadowStackForCallee()
 {
-    unsigned funcIdx = getCurrentLlvmFunctionIndex();
-    unsigned hndIndex =
-        (funcIdx == ROOT_FUNC_IDX) ? EHblkDsc::NO_ENCLOSING_INDEX : _compiler->funGetFunc(funcIdx)->funEHIndex;
-
-    return gepOrAddrInBounds(getShadowStack(), getShadowFrameSize(hndIndex));
+    unsigned shadowFrameSize = getShadowFrameSize(getCurrentLlvmFunctionIndex());
+    return gepOrAddrInBounds(getShadowStack(), shadowFrameSize);
 }
 
 Value* Llvm::getOriginalShadowStack()
 {
-    if (getCurrentLlvmFunctionIndex() == ROOT_FUNC_IDX)
+    if (_compiler->funGetFunc(getCurrentLlvmFunctionIndex())->funKind == FUNC_FILTER)
     {
-        return getShadowStack();
+        // The original shadow stack pointer is the second filter parameter.
+        return getCurrentLlvmFunction()->getArg(1);
     }
 
-    // The original shadow stack pointer is the second funclet parameter.
-    return getCurrentLlvmFunction()->getArg(1);
+    return getShadowStack();
 }
 
 void Llvm::setCurrentEmitContextForBlock(BasicBlock* block)
@@ -2912,7 +2920,7 @@ unsigned Llvm::getLlvmFunctionIndexForBlock(BasicBlock* block) const
         EHblkDsc* ehDsc = _compiler->ehGetDsc(block->getHndIndex());
         funcIdx = ehDsc->ebdFuncIndex;
 
-        if (ehDsc->InFilterRegionBBRange(block))
+        if (isBlockInFilter(block))
         {
             funcIdx--;
             assert(_compiler->funGetFunc(funcIdx)->funKind == FUNC_FILTER);
diff --git a/src/coreclr/jit/llvmlower.cpp b/src/coreclr/jit/llvmlower.cpp
index e1e0ba9498fd..53fc254288c6 100644
--- a/src/coreclr/jit/llvmlower.cpp
+++ b/src/coreclr/jit/llvmlower.cpp
@@ -1091,7 +1091,7 @@ GenTree* Llvm::insertShadowStackAddr(GenTree* insertBefore, unsigned offset, uns
     }
 
     // Using an address mode node here explicitizes our assumption that the shadow stack does not overflow.
-    assert(offset <= getShadowFrameSize(EHblkDsc::NO_ENCLOSING_INDEX));
+    assert(offset <= getShadowFrameSize(ROOT_FUNC_IDX));
     GenTree* addrModeNode = createAddrModeNode(shadowStackLcl, offset);
     CurrentRange().InsertBefore(insertBefore, addrModeNode);
 
@@ -1874,7 +1874,7 @@ bool Llvm::mayPhysicallyThrow(GenTree* node)
 // Return Value:
 //    Whether "block" is part of a filter funclet.
 //
-bool Llvm::isBlockInFilter(BasicBlock* block)
+bool Llvm::isBlockInFilter(BasicBlock* block) const
 {
     if (m_blocksInFilters == BlockSetOps::UninitVal())
     {
diff --git a/src/coreclr/jit/llvmlssa.cpp b/src/coreclr/jit/llvmlssa.cpp
index e90c25f5ed8e..7950e6b72055 100644
--- a/src/coreclr/jit/llvmlssa.cpp
+++ b/src/coreclr/jit/llvmlssa.cpp
@@ -511,14 +511,14 @@ class ShadowStackAllocator
                 return RemovePhiDef(lclNode->AsLclVar());
             }
 
-            // Funclets (especially filters) will be called by the dispatcher while live state still exists
-            // on shadow frames below (in the tradional sense, where stacks grow down) them. For this reason,
-            // funclets will access state from the original frame via a dedicated shadow stack pointer, and
-            // use the actual shadow stack for calls.
-            unsigned shadowStackLclNum =
-                m_llvm->CurrentBlock()->hasHndIndex() ? m_llvm->_originalShadowStackLclNum : m_llvm->_shadowStackLclNum;
-            GenTree* lclAddress =
-                m_llvm->insertShadowStackAddr(lclNode, varDsc->GetStackOffset() + lclNode->GetLclOffs(), shadowStackLclNum);
+            // Filters will be called by the first pass while live state still exists on shadow frames above (in the
+            // tradional sense, where stacks grow down) them. For this reason, filters will access state from the
+            // original frame via a dedicated shadow stack pointer, and use the actual shadow stack for calls.
+            unsigned shadowStackLclNum = m_llvm->isBlockInFilter(m_llvm->CurrentBlock())
+                ? m_llvm->_originalShadowStackLclNum
+                : m_llvm->_shadowStackLclNum;
+            unsigned lclOffset = varDsc->GetStackOffset() + lclNode->GetLclOffs();
+            GenTree* lclAddress = m_llvm->insertShadowStackAddr(lclNode, lclOffset, shadowStackLclNum);
 
             ClassLayout* layout = lclNode->TypeIs(TYP_STRUCT) ? lclNode->GetLayout(m_compiler) : nullptr;
             GenTree* storedValue = nullptr;
@@ -582,10 +582,9 @@ class ShadowStackAllocator
         // Add in the shadow stack argument now that we know the shadow frame size.
         if (m_llvm->callHasManagedCallingConvention(call))
         {
-            unsigned hndIndex = m_llvm->CurrentBlock()->hasHndIndex() ? m_llvm->CurrentBlock()->getHndIndex()
-                                                                      : EHblkDsc::NO_ENCLOSING_INDEX;
+            unsigned funcIdx = m_llvm->getLlvmFunctionIndexForBlock(m_llvm->CurrentBlock());
             GenTree* calleeShadowStack =
-                m_llvm->insertShadowStackAddr(call, m_llvm->getShadowFrameSize(hndIndex), m_llvm->_shadowStackLclNum);
+                m_llvm->insertShadowStackAddr(call, m_llvm->getShadowFrameSize(funcIdx), m_llvm->_shadowStackLclNum);
             CallArg* calleeShadowStackArg =
                 call->gtArgs.PushFront(m_compiler, NewCallArg::Primitive(calleeShadowStack, CORINFO_TYPE_PTR));
 
@@ -644,25 +643,24 @@ void Llvm::Allocate()
 // getShadowFrameSize: What is the size of a function's shadow frame?
 //
 // Arguments:
-//    hndIndex - Handler index representing the function, NO_ENCLOSING_INDEX
-//               is used for the root
+//    funcIdx - Index representing the function
 //
 // Return Value:
 //    The size of the shadow frame for the given function. We term this
 //    the value by which the shadow stack pointer must be offset before
 //    calling managed code such that the caller will not clobber anything
-//    live on the frame. Note that funclets do not have any shadow state
+//    live on the frame. Note that filters do not have any shadow state
 //    of their own and use the "original" frame from the parent function.
 //
-unsigned Llvm::getShadowFrameSize(unsigned hndIndex) const
+unsigned Llvm::getShadowFrameSize(unsigned funcIdx) const
 {
-    if (hndIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+    if (_compiler->funGetFunc(funcIdx)->funKind == FUNC_FILTER)
     {
-        assert((_shadowStackLocalsSize % TARGET_POINTER_SIZE) == 0);
-        return _shadowStackLocalsSize;
+        return 0;
     }
 
-    return 0;
+    assert((_shadowStackLocalsSize % TARGET_POINTER_SIZE) == 0);
+    return _shadowStackLocalsSize;
 }
 
 ValueInitKind Llvm::getInitKindForLocal(unsigned lclNum) const