From 5178bd8e4d593cefcdbbd3aad174fd2f1c0f9f32 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Wed, 27 Aug 2025 22:10:38 +0200 Subject: [PATCH 1/9] moved stripPointerTypes() + debug prints --- .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 2 + .../ControlFlow/Resolver/Resolver.cpp | 9 -- .../IfdsIde/Problems/IDETypeStateAnalysis.cpp | 141 +++++++++++++++++- lib/PhasarLLVM/Utils/LLVMShorthands.cpp | 9 ++ .../typestate_analysis_fileio/CMakeLists.txt | 5 - .../Problems/IDETSAnalysisFileIOTest.cpp | 38 ++--- 6 files changed, 170 insertions(+), 34 deletions(-) diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index f4fad2d4cb..d381f7ee69 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -283,6 +283,8 @@ class ModulesToSlotTracker { getVaListTagOrNull(const llvm::Function &Fun); [[nodiscard]] bool isVaListAlloca(const llvm::AllocaInst &Alloc); + +[[nodiscard]] const llvm::DIType *stripPointerTypes(const llvm::DIType *DITy); } // namespace psr #endif diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index f97fc8c1f1..57d6b874f7 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -61,15 +61,6 @@ std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { return std::nullopt; } -static const llvm::DIType *stripPointerTypes(const llvm::DIType *DITy) { - while (const auto *DerivedTy = - llvm::dyn_cast_if_present(DITy)) { - // get rid of the pointer - DITy = DerivedTy->getBaseType(); - } - return DITy; -} - const llvm::DIType *psr::getReceiverType(const llvm::CallBase *CallSite) { if (!CallSite || CallSite->arg_empty() || (CallSite->hasStructRetAttr() && CallSite->arg_size() < 2)) { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp index 2ad21cc686..d678ed484a 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp @@ -22,10 +22,13 @@ #include "phasar/Utils/Logger.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Demangle/Demangle.h" #include "llvm/IR/AbstractCallSite.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -298,6 +301,16 @@ bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::Type *Ty) { } bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { + // TODO: + // - determine if general case is even needed anymore, or if we only need + // the general case + // - Can I use stripPointerTypes() for all cases below (Alloca, etc)? + // - How does AllocaInst, LoadInst, etc work under the hood? + // + // Run + // ./unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest + // for tests, make sure they're compiled with debug info! +#if false // General case if (V->getType()->isPointerTy() && !V->getType()->isOpaquePointerTy()) { if (hasMatchingTypeName(V->getType()->getNonOpaquePointerElementType())) { @@ -305,6 +318,8 @@ bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { } // fallthrough } +#endif + if (const auto *Alloca = llvm::dyn_cast(V)) { if (Alloca->getAllocatedType()->isPointerTy()) { if (Alloca->getAllocatedType()->isOpaquePointerTy() || @@ -337,6 +352,130 @@ bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { return false; } return false; -} +#if false + if (const auto *DITy = getVarTypeFromIR(V)) { + + llvm::outs() << "------------------------------------------------\n"; + + if (DITy) { + llvm::outs() << "DITy: exists" << "\n"; + if (DITy->getTag()) { + llvm::outs() << "DITy: had tag" << "\n"; + llvm::outs() << "tag was: " << DITy->getTag() << "\n"; + llvm::outs() << "TagString: " << llvm::dwarf::TagString(DITy->getTag()) + << "\n"; + } else { + llvm::outs() << "DITy: not tag, sadly" << "\n"; + } + } else { + llvm::outs() << "DITy: was nullptr" << "\n"; + } + + const auto *BaseOfDITy = psr::stripPointerTypes(DITy); + + if (BaseOfDITy) { + llvm::outs() << "BaseOfDITy: exists" << "\n"; + + if (BaseOfDITy->getTag()) { + llvm::outs() << "BaseOfDITy: had tag" << "\n"; + llvm::outs() << "tag was: " << BaseOfDITy->getTag() << "\n"; + llvm::outs() << "TagString: " + << llvm::dwarf::TagString(BaseOfDITy->getTag()) << "\n"; + } else { + llvm::outs() << "BaseOfDITy: not tag, sadly" << "\n"; + } + } else { + llvm::outs() << "BaseOfDITy: was nullptr" << "\n"; + } + + // General case + if (DITy->getTag() == llvm::dwarf::DW_TAG_structure_type) { + // if (hasMatchingType(DITy)) { + // return true; + // } + } + + if (const auto *Alloca = llvm::dyn_cast(V)) { + llvm::outs() << "Was AllocaInst\n"; + + if (Alloca->getAllocatedType()->isPointerTy()) { + if (Alloca->getAllocatedType()->isOpaquePointerTy() || + hasMatchingTypeName( + Alloca->getAllocatedType()->getNonOpaquePointerElementType())) { + return true; + } + } + return false; + } + + if (const auto *Load = llvm::dyn_cast(V)) { + llvm::outs() << "Was LoadInst\n"; + if (Load->getType()->isPointerTy()) { + if (Load->getType()->isOpaquePointerTy() || + hasMatchingTypeName( + Load->getType()->getNonOpaquePointerElementType())) { + return true; + } + } + return false; + } + if (const auto *Store = llvm::dyn_cast(V)) { + llvm::outs() << "Was StoreInst\n"; + if (Store->getValueOperand()->getType()->isPointerTy()) { + if (Store->getValueOperand()->getType()->isOpaquePointerTy() || + hasMatchingTypeName(Store->getValueOperand() + ->getType() + ->getNonOpaquePointerElementType())) { + return true; + } + } + return false; + } + + return false; + +#if false + // General case + if (V->getType()->isPointerTy() && !V->getType()->isOpaquePointerTy()) { + if (hasMatchingTypeName(V->getType()->getNonOpaquePointerElementType())) { + return true; + } + // fallthrough + } + if (const auto *Alloca = llvm::dyn_cast(V)) { + if (Alloca->getAllocatedType()->isPointerTy()) { + if (Alloca->getAllocatedType()->isOpaquePointerTy() || + hasMatchingTypeName( + Alloca->getAllocatedType()->getNonOpaquePointerElementType())) { + return true; + } + } + return false; + } + if (const auto *Load = llvm::dyn_cast(V)) { + if (Load->getType()->isPointerTy()) { + if (Load->getType()->isOpaquePointerTy() || + hasMatchingTypeName( + Load->getType()->getNonOpaquePointerElementType())) { + return true; + } + } + return false; + } + if (const auto *Store = llvm::dyn_cast(V)) { + if (Store->getValueOperand()->getType()->isPointerTy()) { + if (Store->getValueOperand()->getType()->isOpaquePointerTy() || + hasMatchingTypeName(Store->getValueOperand() + ->getType() + ->getNonOpaquePointerElementType())) { + return true; + } + } + return false; + } + return false; +#endif +#endif +} } // namespace psr::detail diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index 18cb20c97e..2e1f25994a 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -617,3 +617,12 @@ bool psr::isVaListAlloca(const llvm::AllocaInst &Alloc) { return false; } + +const llvm::DIType *psr::stripPointerTypes(const llvm::DIType *DITy) { + while (const auto *DerivedTy = + llvm::dyn_cast_if_present(DITy)) { + // get rid of the pointer + DITy = DerivedTy->getBaseType(); + } + return DITy; +} diff --git a/test/llvm_test_code/typestate_analysis_fileio/CMakeLists.txt b/test/llvm_test_code/typestate_analysis_fileio/CMakeLists.txt index 9da0ecdc68..ef2be35b66 100644 --- a/test/llvm_test_code/typestate_analysis_fileio/CMakeLists.txt +++ b/test/llvm_test_code/typestate_analysis_fileio/CMakeLists.txt @@ -1,10 +1,5 @@ file(GLOB typestate_files *.c *.cpp) -foreach(TEST_SRC ${typestate_files}) - get_filename_component(TEST_SRC_FILE ${TEST_SRC} NAME) - generate_ll_file(FILE ${TEST_SRC_FILE}) -endforeach(TEST_SRC) - foreach(TEST_SRC ${typestate_files}) get_filename_component(TEST_SRC_FILE ${TEST_SRC} NAME) generate_ll_file(FILE ${TEST_SRC_FILE} DEBUG) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest.cpp index 0b2d4fa52e..e5c80d4d3f 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest.cpp @@ -91,7 +91,7 @@ class IDETSAnalysisFileIOTest : public ::testing::Test { }; // Test Fixture TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_01) { - initialize({PathToLlFiles + "typestate_01_c.ll"}); + initialize({PathToLlFiles + "typestate_01_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); const std::map> Gt = { @@ -102,7 +102,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_01) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_02) { - initialize({PathToLlFiles + "typestate_02_c.ll"}); + initialize({PathToLlFiles + "typestate_02_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -112,7 +112,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_02) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_03) { - initialize({PathToLlFiles + "typestate_03_c.ll"}); + initialize({PathToLlFiles + "typestate_03_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -138,7 +138,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_03) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_04) { - initialize({PathToLlFiles + "typestate_04_c.ll"}); + initialize({PathToLlFiles + "typestate_04_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -160,7 +160,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_04) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_05) { - initialize({PathToLlFiles + "typestate_05_c.ll"}); + initialize({PathToLlFiles + "typestate_05_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -179,7 +179,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_05) { TEST_F(IDETSAnalysisFileIOTest, DISABLED_HandleTypeState_06) { // This test fails due to imprecise points-to information - initialize({PathToLlFiles + "typestate_06_c.ll"}); + initialize({PathToLlFiles + "typestate_06_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -214,7 +214,7 @@ TEST_F(IDETSAnalysisFileIOTest, DISABLED_HandleTypeState_06) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_07) { - initialize({PathToLlFiles + "typestate_07_c.ll"}); + initialize({PathToLlFiles + "typestate_07_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -246,7 +246,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_07) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_08) { - initialize({PathToLlFiles + "typestate_08_c.ll"}); + initialize({PathToLlFiles + "typestate_08_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -259,7 +259,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_08) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_09) { - initialize({PathToLlFiles + "typestate_09_c.ll"}); + initialize({PathToLlFiles + "typestate_09_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -277,7 +277,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_09) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_10) { - initialize({PathToLlFiles + "typestate_10_c.ll"}); + initialize({PathToLlFiles + "typestate_10_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -299,7 +299,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_10) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_11) { - initialize({PathToLlFiles + "typestate_11_c.ll"}); + initialize({PathToLlFiles + "typestate_11_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -326,7 +326,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_11) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_12) { - initialize({PathToLlFiles + "typestate_12_c.ll"}); + initialize({PathToLlFiles + "typestate_12_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -346,7 +346,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_12) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_13) { - initialize({PathToLlFiles + "typestate_13_c.ll"}); + initialize({PathToLlFiles + "typestate_13_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -361,7 +361,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_13) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_14) { - initialize({PathToLlFiles + "typestate_14_c.ll"}); + initialize({PathToLlFiles + "typestate_14_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -384,7 +384,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_14) { } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_15) { - initialize({PathToLlFiles + "typestate_15_c.ll"}); + initialize({PathToLlFiles + "typestate_15_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -446,7 +446,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_16) { /// TODO: After the EF fix everything is BOT; --> Make the TSA more precise! - initialize({PathToLlFiles + "typestate_16_c.ll"}); + initialize({PathToLlFiles + "typestate_16_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -483,7 +483,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_16) { // TODO: Check this case again! TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_17) { - initialize({PathToLlFiles + "typestate_17_c.ll"}); + initialize({PathToLlFiles + "typestate_17_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -514,7 +514,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_17) { TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_18) { /// TODO: After the EF fix everything is BOT; --> Make the TSA more precise! - initialize({PathToLlFiles + "typestate_18_c.ll"}); + initialize({PathToLlFiles + "typestate_18_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); @@ -536,7 +536,7 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_18) { // TODO: Check this case again! TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_19) { - initialize({PathToLlFiles + "typestate_19_c.ll"}); + initialize({PathToLlFiles + "typestate_19_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); From a9df1fc1d00413e411d24a3a317039907266474f Mon Sep 17 00:00:00 2001 From: mxHuber Date: Fri, 29 Aug 2025 22:27:47 +0200 Subject: [PATCH 2/9] MetadataKind approach --- .../IfdsIde/Problems/IDETypeStateAnalysis.h | 24 ++++++++++-- .../CSTDFILEIOTypeStateDescription.h | 5 +++ .../OpenSSLEVPKDFDescription.h | 8 ++++ .../TypeStateDescription.h | 5 +++ .../IfdsIde/Problems/IDETypeStateAnalysis.cpp | 38 ++++++++++++++++++- .../CSTDFILEIOTypeStateDescription.cpp | 13 +++++++ .../OpenSSLEVPKDFDescription.cpp | 14 +++++++ 7 files changed, 103 insertions(+), 4 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h index 35226bf2f7..7f520391d4 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h @@ -23,9 +23,12 @@ #include "phasar/Utils/Printer.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Demangle/Demangle.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Value.h" #include @@ -81,6 +84,10 @@ class IDETypeStateAnalysisBase isFactoryFunction(llvm::StringRef Name) const noexcept = 0; [[nodiscard]] virtual bool isTypeNameOfInterest(llvm::StringRef Name) const noexcept = 0; + [[nodiscard]] virtual bool + isTypeTagOfInterest(llvm::dwarf::Tag CompareTag) const noexcept = 0; + [[nodiscard]] virtual bool + isTypeOfInterest(const llvm::Metadata *MDOp) const noexcept = 0; /** * @brief Returns all alloca's that are (indirect) aliases of V. @@ -115,7 +122,7 @@ class IDETypeStateAnalysisBase container_type getLocalAliasesAndAllocas(d_t V, llvm::StringRef Fname); /** - * @brief Checks if the type machtes the type of interest. + * @brief Checks if the type matches the type of interest. */ bool hasMatchingType(d_t V); @@ -195,8 +202,7 @@ class IDETypeStateAnalysis } [[no_unique_address]] std::conditional_t, - EmptyType, l_t> - BotElement{}; + EmptyType, l_t> BotElement{}; static EdgeFunction join(EdgeFunctionRef This, const EdgeFunction &OtherFunction) { @@ -511,6 +517,18 @@ class IDETypeStateAnalysis return Name.contains(TSD->getTypeNameOfInterest()); } + [[nodiscard]] bool + isTypeTagOfInterest(llvm::dwarf::Tag CompareTag) const noexcept override { + llvm::outs() << "TSD Tag: " + << llvm::dwarf::TagString(TSD->getTypeTagOfInterest()) << "\n"; + return TSD->getTypeTagOfInterest() == CompareTag; + } + + [[nodiscard]] bool + isTypeOfInterest(const llvm::Metadata *MDOp) const noexcept override { + return llvm::isagetTypeOfInterest()>(MDOp); + }; + void emitTextReport(GenericSolverResults SR, llvm::raw_ostream &OS = llvm::outs()) override { LLVMBasedCFG CFG; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h index 9c4da46d74..e9fb84d6de 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h @@ -13,6 +13,8 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/raw_ostream.h" #include @@ -71,6 +73,9 @@ class CSTDFILEIOTypeStateDescription getNextState(llvm::StringRef Tok, TypeStateDescription::State S) const override; [[nodiscard]] std::string getTypeNameOfInterest() const override; + [[nodiscard]] llvm::dwarf::Tag getTypeTagOfInterest() const override; + [[nodiscard]] + llvm::Metadata::MetadataKind getTypeOfInterest() const override; [[nodiscard]] std::set getConsumerParamIdx(llvm::StringRef F) const override; [[nodiscard]] std::set diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h index 324333c623..35781d2e7b 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h @@ -12,6 +12,9 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/DebugInfoMetadata.h" + #include #include #include @@ -70,6 +73,11 @@ class OpenSSLEVPKDFDescription [[nodiscard]] std::string getTypeNameOfInterest() const override; + [[nodiscard]] llvm::dwarf::Tag getTypeTagOfInterest() const override; + + [[nodiscard]] + llvm::Metadata::MetadataKind getTypeOfInterest() const override; + [[nodiscard]] std::set getConsumerParamIdx(llvm::StringRef F) const override; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h index 0e6865686b..01051e24fc 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h @@ -12,6 +12,8 @@ #include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/InstrTypes.h" #include @@ -26,6 +28,9 @@ struct TypeStateDescriptionBase { [[nodiscard]] virtual bool isConsumingFunction(llvm::StringRef F) const = 0; [[nodiscard]] virtual bool isAPIFunction(llvm::StringRef F) const = 0; [[nodiscard]] virtual std::string getTypeNameOfInterest() const = 0; + [[nodiscard]] virtual llvm::dwarf::Tag getTypeTagOfInterest() const = 0; + [[nodiscard]] virtual llvm::Metadata::MetadataKind + getTypeOfInterest() const = 0; [[nodiscard]] virtual std::set getConsumerParamIdx(llvm::StringRef F) const = 0; [[nodiscard]] virtual std::set diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp index d678ed484a..95e4691fed 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -307,9 +308,44 @@ bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { // - Can I use stripPointerTypes() for all cases below (Alloca, etc)? // - How does AllocaInst, LoadInst, etc work under the hood? // + // + // - Dwarf Tags seem to be ill fit for what I am trying to do here. What other + // info can I use? + // + // // Run // ./unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest // for tests, make sure they're compiled with debug info! + + if (V->getType()->isPointerTy()) { + if (const auto *DITy = getVarTypeFromIR(V)) { + // llvm::outs() << "DITy is: " << llvm::dwarf::TagString(DITy->getTag()) + // << "\n"; + if (const auto *BaseDITy = stripPointerTypes(DITy)) { + // llvm::outs() << "BaseDITy is: " + // << llvm::dwarf::TagString(BaseDITy->getTag()) << "\n"; + + if (const auto &Operand = BaseDITy->getOperand(0)) { + if (const auto *OpType = Operand.get()) { + return isTypeOfInterest(OpType); + } + // llvm::outs() << *(BaseDITy->getOperand(0)) << "\n"; + // return isTypeOfInterest(Operand); + // if (llvm::isa(BaseDITy->getOperand(0))) { + // llvm::outs() << "Is a DIFile!!!\n"; + // } + } + // return isTypeOfInterest(BaseDITy->getTag()); + } + + return false; + } + + return false; + } + + return false; + #if false // General case if (V->getType()->isPointerTy() && !V->getType()->isOpaquePointerTy()) { @@ -318,7 +354,6 @@ bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { } // fallthrough } -#endif if (const auto *Alloca = llvm::dyn_cast(V)) { if (Alloca->getAllocatedType()->isPointerTy()) { @@ -352,6 +387,7 @@ bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { return false; } return false; +#endif #if false if (const auto *DITy = getVarTypeFromIR(V)) { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp index 018ac4d93b..a9a2cc0171 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp @@ -12,6 +12,9 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "llvm/ADT/StringMap.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/ErrorHandling.h" #include @@ -130,6 +133,16 @@ std::string CSTDFILEIOTypeStateDescription::getTypeNameOfInterest() const { return "struct._IO_FILE"; } +llvm::dwarf::Tag CSTDFILEIOTypeStateDescription::getTypeTagOfInterest() const { + // TODO: ask fabian if this tag is the best fit. Afaik there is no IOFILE tag + return llvm::dwarf::Tag::DW_TAG_file_type; +} + +llvm::Metadata::MetadataKind +CSTDFILEIOTypeStateDescription::getTypeOfInterest() const { + return llvm::Metadata::DIFileKind; +} + std::set CSTDFILEIOTypeStateDescription::getConsumerParamIdx(llvm::StringRef F) const { if (isConsumingFunction(F)) { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp index d881af8ad6..670c3d50ef 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp @@ -11,6 +11,7 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/ErrorHandling.h" #include @@ -77,6 +78,19 @@ std::string OpenSSLEVPKDFDescription::getTypeNameOfInterest() const { return "struct.evp_kdf_st"; } +llvm::dwarf::Tag OpenSSLEVPKDFDescription::getTypeTagOfInterest() const { + // TODO: ask Fabian what a good tag would be. The current one is just a + // placeholder + return llvm::dwarf::Tag::DW_TAG_structure_type; +} + +llvm::Metadata::MetadataKind +OpenSSLEVPKDFDescription::getTypeOfInterest() const { + // TODO: ask Fabian what MetadataKind could work here, if any. + // Return type here is a placeholder. + return llvm::Metadata::GenericDINodeKind; +} + std::set OpenSSLEVPKDFDescription::getConsumerParamIdx(llvm::StringRef F) const { if (isConsumingFunction(F)) { From c7e6f98d20d07719f237f99b5de671a9a073fe30 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Sat, 30 Aug 2025 21:24:12 +0200 Subject: [PATCH 3/9] I think we don't get around using getName() --- .../IfdsIde/Problems/IDETypeStateAnalysis.h | 8 ++- .../IfdsIde/Problems/IDETypeStateAnalysis.cpp | 60 +++++++++++++------ .../CSTDFILEIOTypeStateDescription.cpp | 2 +- 3 files changed, 50 insertions(+), 20 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h index 7f520391d4..74c4ed3315 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h @@ -131,6 +131,7 @@ class IDETypeStateAnalysisBase return generateFlow(FactToGenerate, LLVMZeroValue::getInstance()); } + bool checkType(const llvm::Value *Value); bool hasMatchingTypeName(const llvm::Type *Ty); std::map AliasCache; @@ -514,19 +515,22 @@ class IDETypeStateAnalysis [[nodiscard]] bool isTypeNameOfInterest(llvm::StringRef Name) const noexcept override { + llvm::outs() << "TSD->getTypeNameOfInterest(): " + << TSD->getTypeNameOfInterest() << "\n"; return Name.contains(TSD->getTypeNameOfInterest()); } [[nodiscard]] bool isTypeTagOfInterest(llvm::dwarf::Tag CompareTag) const noexcept override { - llvm::outs() << "TSD Tag: " + llvm::outs() << "TSD->getTypeTagOfInterest(): " << llvm::dwarf::TagString(TSD->getTypeTagOfInterest()) << "\n"; return TSD->getTypeTagOfInterest() == CompareTag; } [[nodiscard]] bool isTypeOfInterest(const llvm::Metadata *MDOp) const noexcept override { - return llvm::isagetTypeOfInterest()>(MDOp); + // return llvm::isagetTypeOfInterest()>(MDOp); + return false; }; void emitTextReport(GenericSolverResults SR, diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp index 95e4691fed..6d8e1ba571 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp @@ -301,6 +301,15 @@ bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::Type *Ty) { return isTypeNameOfInterest(Str); } +bool IDETypeStateAnalysisBase::checkType(const llvm::Value *Value) { + if (const auto *DITy = getVarTypeFromIR(Value)) { + return isTypeTagOfInterest(DITy->getTag()) && + isTypeNameOfInterest(DITy->getName()); + } + + return false; +} + bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { // TODO: // - determine if general case is even needed anymore, or if we only need @@ -317,30 +326,47 @@ bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { // ./unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest // for tests, make sure they're compiled with debug info! + // General case if (V->getType()->isPointerTy()) { if (const auto *DITy = getVarTypeFromIR(V)) { - // llvm::outs() << "DITy is: " << llvm::dwarf::TagString(DITy->getTag()) - // << "\n"; if (const auto *BaseDITy = stripPointerTypes(DITy)) { - // llvm::outs() << "BaseDITy is: " - // << llvm::dwarf::TagString(BaseDITy->getTag()) << "\n"; - - if (const auto &Operand = BaseDITy->getOperand(0)) { - if (const auto *OpType = Operand.get()) { - return isTypeOfInterest(OpType); - } - // llvm::outs() << *(BaseDITy->getOperand(0)) << "\n"; - // return isTypeOfInterest(Operand); - // if (llvm::isa(BaseDITy->getOperand(0))) { - // llvm::outs() << "Is a DIFile!!!\n"; - // } - } - // return isTypeOfInterest(BaseDITy->getTag()); + llvm::outs() << "-------------------------------\n"; + llvm::outs() << "BaseDITy->getTag(): " + << llvm::dwarf::TagString(BaseDITy->getTag()) << "\n"; + llvm::outs() << "BaseDITy->getName(): " << BaseDITy->getName() << "\n"; + return isTypeTagOfInterest(BaseDITy->getTag()) && + isTypeNameOfInterest(BaseDITy->getName()); } + } + return false; + } - return false; + if (const auto *Alloca = llvm::dyn_cast(V)) { + if (Alloca->getAllocatedType()->isPointerTy()) { + checkType(Alloca); + } + return false; + } + + if (const auto *Load = llvm::dyn_cast(V)) { + if (Load->getType()->isPointerTy()) { + checkType(Load); } + return false; + } + if (const auto *Store = llvm::dyn_cast(V)) { + if (Store->getValueOperand()->getType()->isPointerTy()) { +#if false + if (Store->getValueOperand()->getType()->isOpaquePointerTy() || + hasMatchingTypeName(Store->getValueOperand() + ->getType() + ->getNonOpaquePointerElementType())) { + return true; + } +#endif + checkType(Store->getValueOperand()); + } return false; } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp index a9a2cc0171..64fa28eff2 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp @@ -135,7 +135,7 @@ std::string CSTDFILEIOTypeStateDescription::getTypeNameOfInterest() const { llvm::dwarf::Tag CSTDFILEIOTypeStateDescription::getTypeTagOfInterest() const { // TODO: ask fabian if this tag is the best fit. Afaik there is no IOFILE tag - return llvm::dwarf::Tag::DW_TAG_file_type; + return llvm::dwarf::Tag::DW_TAG_structure_type; } llvm::Metadata::MetadataKind From 51ad2ea1013c1b29fe81caf096f36f8a5232fe64 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Tue, 2 Sep 2025 21:43:56 +0200 Subject: [PATCH 4/9] unittests still fail --- .../IfdsIde/Problems/IDETypeStateAnalysis.h | 23 +- .../CSTDFILEIOTypeStateDescription.h | 1 - .../OpenSSLEVPKDFCTXDescription.h | 2 + .../OpenSSLEVPKDFDescription.h | 2 - .../TypeStateDescription.h | 1 - .../IfdsIde/Problems/IDETypeStateAnalysis.cpp | 234 +----- .../CSTDFILEIOTypeStateDescription.cpp | 9 +- .../OpenSSLEVPKDFCTXDescription.cpp | 7 +- .../OpenSSLEVPKDFDescription.cpp | 6 - .../Problems/IDETSAnalysisFileIOTest.cpp | 739 +++++++++--------- unittests/TestUtils/SrcCodeLocationEntry.h | 450 +++++++++++ 11 files changed, 868 insertions(+), 606 deletions(-) create mode 100644 unittests/TestUtils/SrcCodeLocationEntry.h diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h index 74c4ed3315..23c2258482 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h @@ -84,10 +84,6 @@ class IDETypeStateAnalysisBase isFactoryFunction(llvm::StringRef Name) const noexcept = 0; [[nodiscard]] virtual bool isTypeNameOfInterest(llvm::StringRef Name) const noexcept = 0; - [[nodiscard]] virtual bool - isTypeTagOfInterest(llvm::dwarf::Tag CompareTag) const noexcept = 0; - [[nodiscard]] virtual bool - isTypeOfInterest(const llvm::Metadata *MDOp) const noexcept = 0; /** * @brief Returns all alloca's that are (indirect) aliases of V. @@ -131,8 +127,9 @@ class IDETypeStateAnalysisBase return generateFlow(FactToGenerate, LLVMZeroValue::getInstance()); } - bool checkType(const llvm::Value *Value); bool hasMatchingTypeName(const llvm::Type *Ty); + bool hasMatchingTypeName(const llvm::Value *Value); + bool hasMatchingTypeName(const llvm::DIType *DITy); std::map AliasCache; LLVMAliasInfoRef PT{}; @@ -517,22 +514,12 @@ class IDETypeStateAnalysis isTypeNameOfInterest(llvm::StringRef Name) const noexcept override { llvm::outs() << "TSD->getTypeNameOfInterest(): " << TSD->getTypeNameOfInterest() << "\n"; + llvm::outs() << "Compare Name: " << Name << "\n"; + llvm::outs() << "Name.contains(TSD->getTypeNameOfInterest()): " + << Name.contains(TSD->getTypeNameOfInterest()) << "\n"; return Name.contains(TSD->getTypeNameOfInterest()); } - [[nodiscard]] bool - isTypeTagOfInterest(llvm::dwarf::Tag CompareTag) const noexcept override { - llvm::outs() << "TSD->getTypeTagOfInterest(): " - << llvm::dwarf::TagString(TSD->getTypeTagOfInterest()) << "\n"; - return TSD->getTypeTagOfInterest() == CompareTag; - } - - [[nodiscard]] bool - isTypeOfInterest(const llvm::Metadata *MDOp) const noexcept override { - // return llvm::isagetTypeOfInterest()>(MDOp); - return false; - }; - void emitTextReport(GenericSolverResults SR, llvm::raw_ostream &OS = llvm::outs()) override { LLVMBasedCFG CFG; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h index e9fb84d6de..ff7942350b 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h @@ -73,7 +73,6 @@ class CSTDFILEIOTypeStateDescription getNextState(llvm::StringRef Tok, TypeStateDescription::State S) const override; [[nodiscard]] std::string getTypeNameOfInterest() const override; - [[nodiscard]] llvm::dwarf::Tag getTypeTagOfInterest() const override; [[nodiscard]] llvm::Metadata::MetadataKind getTypeOfInterest() const override; [[nodiscard]] std::set diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h index 144d3571c4..f0f1c69265 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h @@ -93,6 +93,8 @@ class OpenSSLEVPKDFCTXDescription getNextState(llvm::StringRef Tok, State S, const llvm::CallBase *CallSite) const override; [[nodiscard]] std::string getTypeNameOfInterest() const override; + [[nodiscard]] + llvm::Metadata::MetadataKind getTypeOfInterest() const override; [[nodiscard]] std::set getConsumerParamIdx(llvm::StringRef F) const override; [[nodiscard]] std::set diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h index 35781d2e7b..864180f780 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h @@ -73,8 +73,6 @@ class OpenSSLEVPKDFDescription [[nodiscard]] std::string getTypeNameOfInterest() const override; - [[nodiscard]] llvm::dwarf::Tag getTypeTagOfInterest() const override; - [[nodiscard]] llvm::Metadata::MetadataKind getTypeOfInterest() const override; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h index 01051e24fc..e197f97ae7 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h @@ -28,7 +28,6 @@ struct TypeStateDescriptionBase { [[nodiscard]] virtual bool isConsumingFunction(llvm::StringRef F) const = 0; [[nodiscard]] virtual bool isAPIFunction(llvm::StringRef F) const = 0; [[nodiscard]] virtual std::string getTypeNameOfInterest() const = 0; - [[nodiscard]] virtual llvm::dwarf::Tag getTypeTagOfInterest() const = 0; [[nodiscard]] virtual llvm::Metadata::MetadataKind getTypeOfInterest() const = 0; [[nodiscard]] virtual std::set diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp index 6d8e1ba571..c74a0b4c94 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp @@ -301,243 +301,47 @@ bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::Type *Ty) { return isTypeNameOfInterest(Str); } -bool IDETypeStateAnalysisBase::checkType(const llvm::Value *Value) { - if (const auto *DITy = getVarTypeFromIR(Value)) { - return isTypeTagOfInterest(DITy->getTag()) && - isTypeNameOfInterest(DITy->getName()); +bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::Value *Value) { + if (const auto *VarTy = getVarTypeFromIR(Value)) { + if (const auto *BaseTy = stripPointerTypes(VarTy)) { + return hasMatchingTypeName(BaseTy); + } } return false; } -bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { - // TODO: - // - determine if general case is even needed anymore, or if we only need - // the general case - // - Can I use stripPointerTypes() for all cases below (Alloca, etc)? - // - How does AllocaInst, LoadInst, etc work under the hood? - // - // - // - Dwarf Tags seem to be ill fit for what I am trying to do here. What other - // info can I use? - // - // - // Run - // ./unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest - // for tests, make sure they're compiled with debug info! - - // General case - if (V->getType()->isPointerTy()) { - if (const auto *DITy = getVarTypeFromIR(V)) { - if (const auto *BaseDITy = stripPointerTypes(DITy)) { - llvm::outs() << "-------------------------------\n"; - llvm::outs() << "BaseDITy->getTag(): " - << llvm::dwarf::TagString(BaseDITy->getTag()) << "\n"; - llvm::outs() << "BaseDITy->getName(): " << BaseDITy->getName() << "\n"; - return isTypeTagOfInterest(BaseDITy->getTag()) && - isTypeNameOfInterest(BaseDITy->getName()); - } - } - return false; - } - - if (const auto *Alloca = llvm::dyn_cast(V)) { - if (Alloca->getAllocatedType()->isPointerTy()) { - checkType(Alloca); - } - return false; - } - - if (const auto *Load = llvm::dyn_cast(V)) { - if (Load->getType()->isPointerTy()) { - checkType(Load); - } - return false; - } - - if (const auto *Store = llvm::dyn_cast(V)) { - if (Store->getValueOperand()->getType()->isPointerTy()) { -#if false - if (Store->getValueOperand()->getType()->isOpaquePointerTy() || - hasMatchingTypeName(Store->getValueOperand() - ->getType() - ->getNonOpaquePointerElementType())) { - return true; - } -#endif - checkType(Store->getValueOperand()); - } - return false; +bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::DIType *DITy) { + if (llvm::isa(DITy) && !DITy->getName().empty()) { + return isTypeNameOfInterest(DITy->getName()); } return false; +} -#if false +bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { // General case - if (V->getType()->isPointerTy() && !V->getType()->isOpaquePointerTy()) { - if (hasMatchingTypeName(V->getType()->getNonOpaquePointerElementType())) { + if (V->getType()->isPointerTy()) { + if (hasMatchingTypeName(V)) { return true; } // fallthrough } if (const auto *Alloca = llvm::dyn_cast(V)) { - if (Alloca->getAllocatedType()->isPointerTy()) { - if (Alloca->getAllocatedType()->isOpaquePointerTy() || - hasMatchingTypeName( - Alloca->getAllocatedType()->getNonOpaquePointerElementType())) { - return true; - } - } - return false; - } - if (const auto *Load = llvm::dyn_cast(V)) { - if (Load->getType()->isPointerTy()) { - if (Load->getType()->isOpaquePointerTy() || - hasMatchingTypeName( - Load->getType()->getNonOpaquePointerElementType())) { - return true; - } - } - return false; - } - if (const auto *Store = llvm::dyn_cast(V)) { - if (Store->getValueOperand()->getType()->isPointerTy()) { - if (Store->getValueOperand()->getType()->isOpaquePointerTy() || - hasMatchingTypeName(Store->getValueOperand() - ->getType() - ->getNonOpaquePointerElementType())) { - return true; - } - } - return false; - } - return false; -#endif -#if false - if (const auto *DITy = getVarTypeFromIR(V)) { - - llvm::outs() << "------------------------------------------------\n"; - - if (DITy) { - llvm::outs() << "DITy: exists" << "\n"; - if (DITy->getTag()) { - llvm::outs() << "DITy: had tag" << "\n"; - llvm::outs() << "tag was: " << DITy->getTag() << "\n"; - llvm::outs() << "TagString: " << llvm::dwarf::TagString(DITy->getTag()) - << "\n"; - } else { - llvm::outs() << "DITy: not tag, sadly" << "\n"; - } - } else { - llvm::outs() << "DITy: was nullptr" << "\n"; - } - - const auto *BaseOfDITy = psr::stripPointerTypes(DITy); - - if (BaseOfDITy) { - llvm::outs() << "BaseOfDITy: exists" << "\n"; - - if (BaseOfDITy->getTag()) { - llvm::outs() << "BaseOfDITy: had tag" << "\n"; - llvm::outs() << "tag was: " << BaseOfDITy->getTag() << "\n"; - llvm::outs() << "TagString: " - << llvm::dwarf::TagString(BaseOfDITy->getTag()) << "\n"; - } else { - llvm::outs() << "BaseOfDITy: not tag, sadly" << "\n"; - } - } else { - llvm::outs() << "BaseOfDITy: was nullptr" << "\n"; - } - - // General case - if (DITy->getTag() == llvm::dwarf::DW_TAG_structure_type) { - // if (hasMatchingType(DITy)) { - // return true; - // } - } - - if (const auto *Alloca = llvm::dyn_cast(V)) { - llvm::outs() << "Was AllocaInst\n"; - - if (Alloca->getAllocatedType()->isPointerTy()) { - if (Alloca->getAllocatedType()->isOpaquePointerTy() || - hasMatchingTypeName( - Alloca->getAllocatedType()->getNonOpaquePointerElementType())) { - return true; - } - } - return false; - } - - if (const auto *Load = llvm::dyn_cast(V)) { - llvm::outs() << "Was LoadInst\n"; - if (Load->getType()->isPointerTy()) { - if (Load->getType()->isOpaquePointerTy() || - hasMatchingTypeName( - Load->getType()->getNonOpaquePointerElementType())) { - return true; - } - } - return false; - } - if (const auto *Store = llvm::dyn_cast(V)) { - llvm::outs() << "Was StoreInst\n"; - if (Store->getValueOperand()->getType()->isPointerTy()) { - if (Store->getValueOperand()->getType()->isOpaquePointerTy() || - hasMatchingTypeName(Store->getValueOperand() - ->getType() - ->getNonOpaquePointerElementType())) { - return true; - } - } - return false; - } - - return false; - -#if false - // General case - if (V->getType()->isPointerTy() && !V->getType()->isOpaquePointerTy()) { - if (hasMatchingTypeName(V->getType()->getNonOpaquePointerElementType())) { - return true; - } - // fallthrough + return Alloca->getAllocatedType()->isPointerTy() && + hasMatchingTypeName(Alloca); } - if (const auto *Alloca = llvm::dyn_cast(V)) { - if (Alloca->getAllocatedType()->isPointerTy()) { - if (Alloca->getAllocatedType()->isOpaquePointerTy() || - hasMatchingTypeName( - Alloca->getAllocatedType()->getNonOpaquePointerElementType())) { - return true; - } - } - return false; - } if (const auto *Load = llvm::dyn_cast(V)) { - if (Load->getType()->isPointerTy()) { - if (Load->getType()->isOpaquePointerTy() || - hasMatchingTypeName( - Load->getType()->getNonOpaquePointerElementType())) { - return true; - } - } - return false; + return Load->getType()->isPointerTy() && hasMatchingTypeName(Load); } + if (const auto *Store = llvm::dyn_cast(V)) { - if (Store->getValueOperand()->getType()->isPointerTy()) { - if (Store->getValueOperand()->getType()->isOpaquePointerTy() || - hasMatchingTypeName(Store->getValueOperand() - ->getType() - ->getNonOpaquePointerElementType())) { - return true; - } - } - return false; + return Store->getValueOperand()->getType()->isPointerTy() && + hasMatchingTypeName(Store->getValueOperand()); } + return false; -#endif -#endif } } // namespace psr::detail diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp index 64fa28eff2..1492f0aadf 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp @@ -130,17 +130,12 @@ CSTDFILEIOTypeStateDescription::getNextState(llvm::StringRef Tok, } std::string CSTDFILEIOTypeStateDescription::getTypeNameOfInterest() const { - return "struct._IO_FILE"; -} - -llvm::dwarf::Tag CSTDFILEIOTypeStateDescription::getTypeTagOfInterest() const { - // TODO: ask fabian if this tag is the best fit. Afaik there is no IOFILE tag - return llvm::dwarf::Tag::DW_TAG_structure_type; + return "_IO_FILE"; } llvm::Metadata::MetadataKind CSTDFILEIOTypeStateDescription::getTypeOfInterest() const { - return llvm::Metadata::DIFileKind; + return llvm::Metadata::DICompositeTypeKind; } std::set diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp index 38a4e22af1..faef984a5b 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp @@ -125,7 +125,12 @@ OpenSSLEVPKDFCTXState OpenSSLEVPKDFCTXDescription::getNextState( } std::string OpenSSLEVPKDFCTXDescription::getTypeNameOfInterest() const { - return "struct.evp_kdf_ctx_st"; + return "evp_kdf_ctx_st"; +} + +llvm::Metadata::MetadataKind +OpenSSLEVPKDFCTXDescription::getTypeOfInterest() const { + return llvm::Metadata::DICompositeTypeKind; } std::set diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp index 670c3d50ef..a34963793c 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp @@ -78,12 +78,6 @@ std::string OpenSSLEVPKDFDescription::getTypeNameOfInterest() const { return "struct.evp_kdf_st"; } -llvm::dwarf::Tag OpenSSLEVPKDFDescription::getTypeTagOfInterest() const { - // TODO: ask Fabian what a good tag would be. The current one is just a - // placeholder - return llvm::dwarf::Tag::DW_TAG_structure_type; -} - llvm::Metadata::MetadataKind OpenSSLEVPKDFDescription::getTypeOfInterest() const { // TODO: ask Fabian what MetadataKind could work here, if any. diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest.cpp index e5c80d4d3f..79f09ab423 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETSAnalysisFileIOTest.cpp @@ -13,20 +13,24 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h" #include "phasar/PhasarLLVM/HelperAnalyses.h" -#include "phasar/PhasarLLVM/Passes/ValueAnnotationPass.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" #include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/DebugOutput.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" + +#include "SrcCodeLocationEntry.h" #include "TestConfig.h" #include "gtest/gtest.h" -#include #include -using namespace std; using namespace psr; +using namespace psr::unittest; /* ============== TEST FIXTURE ============== */ class IDETSAnalysisFileIOTest : public ::testing::Test { @@ -58,9 +62,28 @@ class IDETSAnalysisFileIOTest : public ::testing::Test { *HA, &CSTDFILEIODesc, EntryPoints); } - void SetUp() override { ValueAnnotationPass::resetValueID(); } - - void TearDown() override {} + using GroundTruthMapTy = + std::map>; + + [[nodiscard]] static inline auto convertTestingLocationMapMapInIR( + const GroundTruthMapTy &Locs, + const ProjectIRDBBase &IRDB) { + std::map> Ret; + llvm::transform( + Locs, std::inserter(Ret, Ret.end()), [&](const auto &LocAndSet) { + const auto &[InstLoc, InnerMap] = LocAndSet; + const auto *LocVal = llvm::dyn_cast_if_present( + testingLocInIR(InstLoc, IRDB)); + std::map ConvMap; + for (const auto &[FactLoc, Val] : InnerMap) { + if (const auto *Fact = testingLocInIR(FactLoc, IRDB)) { + ConvMap.try_emplace(Fact, Val); + } + } + return std::make_pair(LocVal, std::move(ConvMap)); + }); + return Ret; + } /** * We map instruction id to value for the ground truth. ID has to be @@ -69,23 +92,24 @@ class IDETSAnalysisFileIOTest : public ::testing::Test { * @param solver provides the results */ void compareResults( - const std::map> &GroundTruth, + const GroundTruthMapTy &GroundTruth, IDESolver_P> &Solver) { - for (const auto &InstToGroundTruth : GroundTruth) { - const auto *Inst = - HA->getProjectIRDB().getInstruction(InstToGroundTruth.first); - // std::cout << "Handle results at " << InstToGroundTruth.first << - // std::endl; - auto GT = InstToGroundTruth.second; - std::map Results; - for (auto Result : Solver.resultsAt(Inst, true)) { - if (GT.find(getMetaDataID(Result.first)) != GT.end()) { - Results.insert(std::pair( - getMetaDataID(Result.first), int(Result.second))); + auto GroundTruthEntries = + convertTestingLocationMapMapInIR(GroundTruth, HA->getProjectIRDB()); + + for (const auto &[CurrInst, GT] : GroundTruthEntries) { + std::map Results; + + for (const auto &[ResFact, ResState] : Solver.resultsAt(CurrInst, true)) { + if (GT.count(ResFact)) { + Results.try_emplace(ResFact, int(ResState)); } } - EXPECT_EQ(Results, GT) << "At " << llvmIRToShortString(Inst); + + EXPECT_EQ(Results, GT) + << "At " << llvmIRToShortString(CurrInst) << ": Expected " + << PrettyPrinter{GT} << "; got: " << PrettyPrinter{Results}; } } }; // Test Fixture @@ -94,352 +118,378 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_01) { initialize({PathToLlFiles + "typestate_01_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); Llvmtssolver.solve(); - const std::map> Gt = { - {5, {{"3", IOSTATE::UNINIT}}}, - {9, {{"3", IOSTATE::CLOSED}}}, - {7, {{"3", IOSTATE::OPENED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto File = LineColFun{4, 9, "main"}; + const auto Entry = LineColFun{5, 7, "main"}; + const auto EntryTwo = LineColFun{6, 3, "main"}; + const auto EntryThree = LineColFun{7, 3, "main"}; + GroundTruth.insert({Entry, {{File, IOSTATE::UNINIT}}}); + GroundTruth.insert({EntryTwo, {{File, IOSTATE::OPENED}}}); + GroundTruth.insert({EntryThree, {{File, IOSTATE::CLOSED}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_02) { initialize({PathToLlFiles + "typestate_02_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - {7, {{"3", IOSTATE::OPENED}, {"5", IOSTATE::OPENED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto File = LineColFun{4, 9, "main"}; + const auto Entry = LineColFun{6, 3, "main"}; + GroundTruth.insert({Entry, {{File, IOSTATE::OPENED}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_03) { initialize({PathToLlFiles + "typestate_03_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - // llvmtssolver.printReport(); - const std::map> Gt = { - // Entry in foo() - {2, {{"foo.0", IOSTATE::OPENED}}}, - // Exit in foo() - {6, - { - {"foo.0", IOSTATE::CLOSED}, - {"2", IOSTATE::CLOSED}, - {"4", IOSTATE::CLOSED}, - //{"8", IOSTATE::CLOSED} // 6 is before 8; so no info avaliable - // before ret FF - }}, - // Exit in main() - {14, - {{"2", IOSTATE::CLOSED}, - {"8", IOSTATE::CLOSED}, - {"12", IOSTATE::CLOSED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + // %f = alloca ptr, align 8 + const auto MainFile = LineColFun{6, 9, "main"}; + // %f.addr = alloca ptr, align 8 + // const auto FooFile = LineColFun{3, 16, "foo"}; + // const auto FooFClose = + // LineColFun{3, 21, "foo"}; + // %0 = load ptr, ptr %f + const auto PassFToFClose = LineColFun{3, 28, "foo"}; + // ret void + const auto FooRet = LineColFun{3, 32, "foo"}; + // %0 = load ptr, ptr %f, align 8 + const auto PassFToFoo = LineColFun{9, 7, "main"}; + // ret i32 0 + const auto Return = LineColFun{11, 3, "main"}; + // Entry in foo() + // GroundTruth.insert({FooFClose, {{FooFile, IOSTATE::OPENED}}}); + // Exit in foo() + GroundTruth.insert({FooRet, + {// {FooFile, IOSTATE::CLOSED}, + {PassFToFClose, IOSTATE::CLOSED}}}); + // Exit in main() + GroundTruth.insert({Return, + {// {FooFClose, IOSTATE::CLOSED}, + {MainFile, IOSTATE::CLOSED}, + {PassFToFoo, IOSTATE::CLOSED}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_04) { initialize({PathToLlFiles + "typestate_04_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // At exit in foo() - {6, - { - {"2", IOSTATE::OPENED}, - //{"8", IOSTATE::OPENED} // 6 is before 8, so no info available - // before retFF - }}, - // Before closing in main() - {12, {{"2", IOSTATE::UNINIT}, {"8", IOSTATE::UNINIT}}}, - // At exit in main() - {14, {{"2", IOSTATE::ERROR}, {"8", IOSTATE::ERROR}}}}; - - compareResults(Gt, Llvmtssolver); + GroundTruthMapTy GroundTruth; + const auto FooArg = LineColFun{4, 16, "foo"}; + const auto FooRet = LineColFun{4, 49, "foo"}; + const auto File = LineColFun{7, 9, "main"}; + const auto FClose = LineColFun{9, 3, "main"}; + const auto Return = LineColFun{10, 3, "main"}; + GroundTruth.insert({FooRet, {{FooArg, IOSTATE::OPENED}}}); + GroundTruth.insert({FClose, {{File, IOSTATE::UNINIT}}}); + GroundTruth.insert({Return, {{File, IOSTATE::ERROR}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_05) { initialize({PathToLlFiles + "typestate_05_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // Before if statement - {10, {{"4", IOSTATE::OPENED}, {"6", IOSTATE::OPENED}}}, - // Inside if statement at last instruction - {13, - {{"4", IOSTATE::CLOSED}, - {"6", IOSTATE::CLOSED}, - {"11", IOSTATE::CLOSED}}}, - // After if statement - {14, {{"4", IOSTATE::BOT}, {"6", IOSTATE::BOT}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto File = LineColFun{6, 9, "main"}; + const auto CallFOpen = LineColFun{7, 7, "main"}; + const auto AfterFOpen = LineColFun{8, 7, "main"}; + const auto LoadFile = LineColFun{9, 12, "main"}; + const auto AfterFClose = LineColFun{10, 3, "main"}; + const auto Return = LineColFun{11, 3, "main"}; + GroundTruth.insert( + {AfterFOpen, {{File, IOSTATE::OPENED}, {CallFOpen, IOSTATE::OPENED}}}); + GroundTruth.insert({AfterFClose, + {{File, IOSTATE::CLOSED}, + {CallFOpen, IOSTATE::CLOSED}, + {LoadFile, IOSTATE::CLOSED}}}); + GroundTruth.insert( + {Return, {{File, IOSTATE::BOT}, {CallFOpen, IOSTATE::BOT}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, DISABLED_HandleTypeState_06) { // This test fails due to imprecise points-to information initialize({PathToLlFiles + "typestate_06_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // Before first fopen() - {8, {{"5", IOSTATE::UNINIT}, {"6", IOSTATE::UNINIT}}}, - // Before storing the result of the first fopen() - {9, - {{"5", IOSTATE::UNINIT}, - {"6", IOSTATE::UNINIT}, - // Return value of first fopen() - {"8", IOSTATE::OPENED}}}, - // Before second fopen() - {10, - {{"5", IOSTATE::OPENED}, - {"6", IOSTATE::UNINIT}, - {"8", IOSTATE::OPENED}}}, - // Before storing the result of the second fopen() - {11, - {{"5", IOSTATE::OPENED}, - {"6", IOSTATE::UNINIT}, - // Return value of second fopen() - {"10", IOSTATE::OPENED}}}, - // Before fclose() - {13, - {{"5", IOSTATE::OPENED}, - {"6", IOSTATE::OPENED}, - {"12", IOSTATE::OPENED}}}, - // After if statement - {14, {{"5", IOSTATE::CLOSED}, {"6", IOSTATE::OPENED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + + // %f = alloca ptr, align 8 + const auto FileF = LineColFun{5, 9, "main"}; + // %d = alloca ptr, align 8 + const auto FileD = LineColFun{6, 9, "main"}; + // %call = call noalias ptr @fopen(ptr noundef @.str, ptr noundef @.str.1) + const auto FirstFOpenCall = LineColFun{7, 7, "main"}; + // store ptr %call, ptr %f, align 8 + const auto StoreFirstFOpenRetVal = LineColFun{7, 5, "main"}; + // %call1 = call noalias ptr @fopen(ptr noundef @.str.2, ptr noundef @.str.3) + const auto SecondFOpenCall = LineColFun{8, 7, "main"}; + // store ptr %call1, ptr %d, align 8 + const auto StoreSecondFOpenRetVal = LineColFun{8, 5, "main"}; + // %0 = load ptr, ptr %f, align 8 + const auto LoadFileF = LineColFun{10, 10, "main"}; + // %call2 = call i32 @fclose(ptr noundef %0) + const auto CallFClose = LineColFun{10, 3, "main"}; + // ret i32 0 + const auto Return = LineColFun{12, 3, "main"}; + + GroundTruth.insert({FirstFOpenCall, {{FileF, IOSTATE::UNINIT}}}); + GroundTruth.insert({FirstFOpenCall, {{FileD, IOSTATE::UNINIT}}}); + + GroundTruth.insert({StoreFirstFOpenRetVal, {{FileF, IOSTATE::UNINIT}}}); + GroundTruth.insert({StoreFirstFOpenRetVal, {{FileD, IOSTATE::UNINIT}}}); + GroundTruth.insert( + {StoreFirstFOpenRetVal, {{FirstFOpenCall, IOSTATE::OPENED}}}); + + GroundTruth.insert({SecondFOpenCall, {{FileF, IOSTATE::OPENED}}}); + GroundTruth.insert({SecondFOpenCall, {{FileD, IOSTATE::UNINIT}}}); + GroundTruth.insert({SecondFOpenCall, {{FirstFOpenCall, IOSTATE::OPENED}}}); + + GroundTruth.insert({StoreSecondFOpenRetVal, {{FileF, IOSTATE::OPENED}}}); + GroundTruth.insert({StoreSecondFOpenRetVal, {{FileD, IOSTATE::UNINIT}}}); + GroundTruth.insert( + {StoreSecondFOpenRetVal, {{SecondFOpenCall, IOSTATE::OPENED}}}); + + GroundTruth.insert({CallFClose, {{FileF, IOSTATE::OPENED}}}); + GroundTruth.insert({CallFClose, {{FileD, IOSTATE::UNINIT}}}); + GroundTruth.insert({CallFClose, {{LoadFileF, IOSTATE::OPENED}}}); + + GroundTruth.insert({Return, {{FileF, IOSTATE::OPENED}}}); + GroundTruth.insert({Return, {{FileD, IOSTATE::UNINIT}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_07) { initialize({PathToLlFiles + "typestate_07_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // In foo() - {6, - { - {"foo.0", IOSTATE::CLOSED}, {"2", IOSTATE::CLOSED}, - //{"8", IOSTATE::CLOSED}// 6 is before 8, so no info available - // before retFF - }}, - // At fclose() - {11, {{"8", IOSTATE::UNINIT}, {"10", IOSTATE::UNINIT}}}, - // After fclose() - {12, {{"8", IOSTATE::ERROR}, {"10", IOSTATE::ERROR}}}, - // After fopen() - {13, - {{"8", IOSTATE::ERROR}, - {"10", IOSTATE::ERROR}, - {"12", IOSTATE::OPENED}}}, - // After store - {14, - {{"8", IOSTATE::OPENED}, - {"10", IOSTATE::ERROR}, - {"12", IOSTATE::OPENED}}}, - // At exit in main() - {16, {{"2", IOSTATE::CLOSED}, {"8", IOSTATE::CLOSED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + // %f.addr = alloca ptr, align 8 + const auto FooFile = LineColFun{3, 16, "foo"}; + // ret void + const auto FooRet = LineColFun{3, 32, "foo"}; + // %f = alloca ptr, align 8 + const auto MainFile = LineColFun{6, 9, "main"}; + // %0 = load ptr, ptr %f, align 8 + const auto MainFileLoad = LineColFun{7, 10, "main"}; + // %call = call i32 @fclose(ptr noundef %0) + const auto CallFClose = LineColFun{7, 3, "main"}; + // %call1 = call noalias ptr @fopen(ptr noundef @.str, ptr noundef @.str.1) + const auto Call1FOpen = LineColFun{8, 7, "main"}; + // store ptr %call1, ptr %f, align 8 + const auto StoreOfCall1 = LineColFun{8, 5, "main"}; + // %1 = load ptr, ptr %f, align 8 + const auto LoadMainFile = LineColFun{10, 7, "main"}; + // ret i32 0 + const auto MainReturn = LineColFun{12, 3, "main"}; + + GroundTruth.insert({FooRet, {{FooFile, IOSTATE::CLOSED}}}); + GroundTruth.insert( + {CallFClose, + {{MainFile, IOSTATE::UNINIT}, {MainFileLoad, IOSTATE::UNINIT}}}); + GroundTruth.insert( + {Call1FOpen, + {{MainFile, IOSTATE::ERROR}, {MainFileLoad, IOSTATE::ERROR}}}); + GroundTruth.insert({StoreOfCall1, + {{MainFile, IOSTATE::ERROR}, + {MainFileLoad, IOSTATE::ERROR}, + {Call1FOpen, IOSTATE::OPENED}}}); + GroundTruth.insert({LoadMainFile, + {{MainFile, IOSTATE::OPENED}, + {MainFileLoad, IOSTATE::ERROR}, + {Call1FOpen, IOSTATE::OPENED}}}); + GroundTruth.insert( + {MainReturn, {{MainFile, IOSTATE::CLOSED}, {FooFile, IOSTATE::CLOSED}}}); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_08) { initialize({PathToLlFiles + "typestate_08_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // At exit in foo() - {6, {{"2", IOSTATE::OPENED}}}, - // At exit in main() - {11, {{"2", IOSTATE::OPENED}, {"8", IOSTATE::UNINIT}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto FooFile = LineColFun{5, 9, "foo"}; + const auto FooRet = LineColFun{7, 3, "foo"}; + const auto MainFile = LineColFun{11, 9, "main"}; + const auto MainReturn = LineColFun{13, 3, "main"}; + GroundTruth.insert({FooRet, {{FooFile, IOSTATE::OPENED}}}); + GroundTruth.insert( + {MainReturn, {{FooFile, IOSTATE::OPENED}, {MainFile, IOSTATE::UNINIT}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_09) { initialize({PathToLlFiles + "typestate_09_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // At exit in foo() - {8, - { - {"4", IOSTATE::OPENED}, - //{"10", IOSTATE::OPENED}// 8 is before 10, so no info available - // before retFF - }}, - // At exit in main() - {18, {{"4", IOSTATE::CLOSED}, {"10", IOSTATE::CLOSED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto FooFile = LineColFun{5, 9, "foo"}; + const auto FooRet = LineColFun{7, 3, "foo"}; + const auto MainFile = LineColFun{11, 9, "main"}; + const auto MainReturn = LineColFun{15, 3, "main"}; + GroundTruth.insert({FooRet, {{FooFile, IOSTATE::OPENED}}}); + GroundTruth.insert( + {MainReturn, {{FooFile, IOSTATE::CLOSED}, {MainFile, IOSTATE::CLOSED}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_10) { initialize({PathToLlFiles + "typestate_10_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // At exit in bar() - {4, {{"2", IOSTATE::UNINIT}}}, - // At exit in foo() - {11, - {//{"2", IOSTATE::OPENED}, - //{"13", IOSTATE::OPENED}, // 2 and 13 are in different functions, so - // results are not available before retFF - {"5", IOSTATE::OPENED}}}, - // At exit in main() - {19, - {{"2", IOSTATE::CLOSED}, - {"5", IOSTATE::CLOSED}, - {"13", IOSTATE::CLOSED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto BarFile = LineColFun{5, 9, "bar"}; + const auto BarRet = LineColFun{6, 3, "bar"}; + const auto FooFile = LineColFun{10, 9, "foo"}; + const auto FooRet = LineColFun{12, 3, "foo"}; + const auto MainFile = LineColFun{16, 9, "main"}; + const auto MainReturn = LineColFun{20, 3, "main"}; + GroundTruth.insert({BarRet, {{BarFile, IOSTATE::UNINIT}}}); + GroundTruth.insert({FooRet, {{FooFile, IOSTATE::OPENED}}}); + GroundTruth.insert({MainReturn, + {{BarFile, IOSTATE::CLOSED}, + {FooFile, IOSTATE::CLOSED}, + {MainFile, IOSTATE::CLOSED}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_11) { initialize({PathToLlFiles + "typestate_11_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // At exit in bar(): closing uninitialized file-handle gives error-state - {6, - { - {"2", IOSTATE::ERROR}, - //{"7", IOSTATE::ERROR}, - //{"13", IOSTATE::ERROR} // 7 and 13 not yet reached - }}, - // At exit in foo() - {11, - { - //{"2", IOSTATE::OPENED}, // 2 is in different function - {"7", IOSTATE::OPENED}, - //{"13", IOSTATE::OPENED} // 13 is after 11 - }}, - // At exit in main(): due to aliasing the error-state from bar is - // propagated back to main - {19, - {{"2", IOSTATE::ERROR}, {"7", IOSTATE::ERROR}, {"13", IOSTATE::ERROR}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto BarFile = LineColFun{4, 16, "bar"}; + const auto BarRet = LineColFun{4, 32, "bar"}; + const auto FooFile = LineColFun{6, 16, "foo"}; + const auto FooRet = LineColFun{6, 49, "foo"}; + const auto MainFile = LineColFun{9, 9, "main"}; + const auto MainReturn = LineColFun{13, 3, "main"}; + GroundTruth.insert({BarRet, {{BarFile, IOSTATE::ERROR}}}); + GroundTruth.insert({FooRet, {{FooFile, IOSTATE::OPENED}}}); + GroundTruth.insert({MainReturn, + {{BarFile, IOSTATE::ERROR}, + {FooFile, IOSTATE::ERROR}, + {MainFile, IOSTATE::ERROR}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_12) { initialize({PathToLlFiles + "typestate_12_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // At exit in bar() - {6, - { - {"2", IOSTATE::OPENED}, - //{"10", IOSTATE::OPENED} // 6 has no information about 10, as it - // always completes before - }}, - // At exit in foo() - {8, {{"2", IOSTATE::OPENED}, {"10", IOSTATE::OPENED}}}, - // At exit in main() - {16, {{"2", IOSTATE::CLOSED}, {"10", IOSTATE::CLOSED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto BarFile = LineColFun{5, 9, "bar"}; + const auto BarRet = LineColFun{7, 3, "bar"}; + const auto AfterFoo = LineColFun{15, 3, "main"}; + const auto MainFile = LineColFun{13, 9, "main"}; + const auto MainReturn = LineColFun{17, 3, "main"}; + GroundTruth.insert({BarRet, {{BarFile, IOSTATE::OPENED}}}); + GroundTruth.insert( + {AfterFoo, {{MainFile, IOSTATE::OPENED}, {BarFile, IOSTATE::OPENED}}}); + GroundTruth.insert( + {MainReturn, {{MainFile, IOSTATE::CLOSED}, {BarFile, IOSTATE::CLOSED}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_13) { initialize({PathToLlFiles + "typestate_13_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // Before first fclose() - {8, {{"3", IOSTATE::OPENED}}}, - // Before second fclose() - {10, {{"3", IOSTATE::CLOSED}}}, - // At exit in main() - {11, {{"3", IOSTATE::ERROR}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto File = LineColFun{4, 9, "main"}; + const auto BeforeFirstFClose = LineColFun{7, 3, "main"}; + const auto BeforeSecondFClose = LineColFun{8, 3, "main"}; + const auto Return = LineColFun{10, 3, "main"}; + GroundTruth.insert({BeforeFirstFClose, {{File, IOSTATE::OPENED}}}); + GroundTruth.insert({BeforeSecondFClose, {{File, IOSTATE::CLOSED}}}); + GroundTruth.insert({Return, {{File, IOSTATE::ERROR}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_14) { initialize({PathToLlFiles + "typestate_14_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // Before first fopen() - {7, {{"5", IOSTATE::UNINIT}}}, - // Before second fopen() - {9, {{"5", IOSTATE::OPENED}}}, - // After second store - {11, - {{"5", IOSTATE::OPENED}, - {"7", IOSTATE::OPENED}, - {"9", IOSTATE::OPENED}}}, - // At exit in main() - {11, - {{"5", IOSTATE::CLOSED}, - {"7", IOSTATE::CLOSED}, - {"9", IOSTATE::CLOSED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto File = LineColFun{4, 9, "main"}; + const auto BeforeFirstFOpen = LineColFun{5, 5, "main"}; + const auto BeforeSecondFOpen = LineColFun{6, 5, "main"}; + const auto BeforeFClose = LineColFun{7, 3, "main"}; + const auto Return = LineColFun{9, 3, "main"}; + GroundTruth.insert({BeforeFirstFOpen, {{File, IOSTATE::UNINIT}}}); + GroundTruth.insert({BeforeSecondFOpen, {{File, IOSTATE::OPENED}}}); + GroundTruth.insert({BeforeFClose, + {{File, IOSTATE::OPENED}, + {LineColFun{5, 7, "main"}, IOSTATE::OPENED}, + {LineColFun{6, 7, "main"}, IOSTATE::OPENED}}}); + GroundTruth.insert({Return, + {{File, IOSTATE::CLOSED}, + {LineColFun{5, 7, "main"}, IOSTATE::CLOSED}, + {LineColFun{6, 7, "main"}, IOSTATE::CLOSED}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_15) { initialize({PathToLlFiles + "typestate_15_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // After store of ret val of first fopen() - {9, - { - {"5", IOSTATE::OPENED}, {"7", IOSTATE::OPENED}, - // for 9, 11, 13 state is top - // {"9", IOSTATE::OPENED}, - // {"11", IOSTATE::OPENED}, - // {"13", IOSTATE::OPENED} - }}, - // After first fclose() - {11, - { - {"5", IOSTATE::CLOSED}, - {"7", IOSTATE::CLOSED}, - {"9", IOSTATE::CLOSED}, - // for 11 and 13 state is top - // {"11", IOSTATE::CLOSED}, - // {"13", IOSTATE::CLOSED} - }}, - // After second fopen() but before storing ret val - {12, - { - {"5", IOSTATE::CLOSED}, - {"7", IOSTATE::CLOSED}, - {"9", IOSTATE::CLOSED}, - {"11", IOSTATE::OPENED}, - // for 13 state is top - //{"13", IOSTATE::CLOSED} - }}, - // After storing ret val of second fopen() - {13, - { - {"5", IOSTATE::OPENED}, - {"7", IOSTATE::CLOSED}, // 7 and 9 do not alias 11 - {"9", IOSTATE::CLOSED}, - {"11", IOSTATE::OPENED}, - // for 13 state is top - //{"13", IOSTATE::OPENED} - }}, - // At exit in main() - {15, - {{"5", IOSTATE::CLOSED}, - // Due to flow-insensitive alias information, the - // closed file-handle (which has ID 13) may alias - // the closed file handles 7 and 9. Hence closed - // + closed gives error for 7 and 9 => false positive - {"7", IOSTATE::ERROR}, - {"9", IOSTATE::ERROR}, - {"11", IOSTATE::CLOSED}, - {"13", IOSTATE::CLOSED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + // 5: %f = alloca ptr, align 8 + const auto File = LineColFun{4, 9, "main"}; + // %call = call noalias ptr @fopen + const auto FOpen = LineColFun{5, 7, "main"}; + // %0 = load ptr, ptr %f, align 8 + const auto LoadFile = LineColFun{6, 10, "main"}; + // %call2 = call noalias ptr @fopen + const auto SecondFOpen = LineColFun{7, 7, "main"}; + // store ptr %call2, ptr %f, align 8 + const auto StoreSecondFOpen = LineColFun{7, 5, "main"}; + // %1 = load ptr, ptr %f, align 8 + const auto SecondLoadFile = LineColFun{8, 10, "main"}; + // ret i32 0 + const auto Return = LineColFun{10, 3, "main"}; + + GroundTruth.insert( + {LoadFile, {{File, IOSTATE::OPENED}, {FOpen, IOSTATE::OPENED}}}); + GroundTruth.insert({SecondFOpen, + {{File, IOSTATE::CLOSED}, + {FOpen, IOSTATE::CLOSED}, + {LoadFile, IOSTATE::CLOSED}}}); + GroundTruth.insert({StoreSecondFOpen, + {{File, IOSTATE::CLOSED}, + {FOpen, IOSTATE::CLOSED}, + {LoadFile, IOSTATE::CLOSED}, + {SecondFOpen, IOSTATE::OPENED}}}); + GroundTruth.insert({SecondLoadFile, + {{File, IOSTATE::OPENED}, + {FOpen, IOSTATE::CLOSED}, + {LoadFile, IOSTATE::CLOSED}, + {SecondFOpen, IOSTATE::OPENED}}}); + GroundTruth.insert({Return, + {{File, IOSTATE::CLOSED}, + {FOpen, IOSTATE::ERROR}, + {LoadFile, IOSTATE::ERROR}, + {SecondFOpen, IOSTATE::CLOSED}, + {SecondLoadFile, IOSTATE::CLOSED}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_16) { @@ -448,67 +498,46 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_16) { initialize({PathToLlFiles + "typestate_16_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - // Llvmtssolver.dumpResults(); - - // auto Pts = PT->getAliasSet(IRDB->getInstruction(2)); - // std::cout << "Alias(2) = {"; - // bool Frst = true; - // for (const auto *P : *Pts) { - // if (Frst) { - // Frst = false; - // } else { - // std::cout << ", "; - // } - // std::cout << llvmIRToShortString(P); - // } - // std::cout << "}" << std::endl; - - const std::map> Gt = { - // At exit in foo() - {16, - { - //{"2", IOSTATE::CLOSED}, - {"2", IOSTATE::BOT} // Overapproximation due to too flat lattice! - // {"18", IOSTATE::CLOSED} // pointsTo information is not sufficient - }}, - // At exit in main() - {24, - {{"2", IOSTATE::BOT}, - {"18", IOSTATE::BOT}}}}; // Overapproximation due to too flat lattice - // (would expect CLOSED for both)! - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto FooFile = LineColFun{4, 16, "foo"}; + const auto FooExit = LineColFun{11, 1, "foo"}; + const auto MainFile = LineColFun{14, 9, "main"}; + const auto MainReturn = LineColFun{19, 3, "main"}; + // At exit in foo() + GroundTruth.insert({FooExit, {{FooFile, IOSTATE::BOT}}}); + // At exit in main() + GroundTruth.insert( + {MainReturn, {{FooFile, IOSTATE::BOT}, {MainFile, IOSTATE::BOT}}}); + compareResults(GroundTruth, Llvmtssolver); } -// TODO: Check this case again! TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_17) { initialize({PathToLlFiles + "typestate_17_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // Before loop - {15, - {{"2", IOSTATE::CLOSED}, - {"9", IOSTATE::CLOSED}, - {"13", IOSTATE::CLOSED}}}, - // Before fgetc() // fgetc(CLOSED)=ERROR join CLOSED = BOT - {17, - { - {"2", IOSTATE::BOT}, {"9", IOSTATE::BOT}, {"13", IOSTATE::BOT}, - // {"16", IOSTATE::BOT} // at 16 we now have ERROR (actually, this is - // correct as well as BOT) - }}, - // At exit in main() - {22, - { - {"2", IOSTATE::BOT}, {"9", IOSTATE::BOT}, {"13", IOSTATE::BOT}, - //{"16", IOSTATE::BOT} // at 16 we now have ERROR (actually, this is - // correct as well as BOT) - }}}; - compareResults(Gt, Llvmtssolver); + GroundTruthMapTy GroundTruth; + const auto FooFile = LineColFun{4, 16, "foo"}; + const auto File = LineColFun{8, 9, "main"}; + const auto FOpenFile = LineColFun{8, 9, "main"}; + const auto BeforeLoop = LineColFun{14, 3, "main"}; + const auto BeforeFGetC = LineColFun{14, 13, "main"}; + const auto MainReturn = LineColFun{17, 3, "main"}; + GroundTruth.insert({BeforeLoop, + {{FooFile, IOSTATE::CLOSED}, + {File, IOSTATE::CLOSED}, + {FOpenFile, IOSTATE::CLOSED}}}); + GroundTruth.insert({BeforeFGetC, + {{FooFile, IOSTATE::BOT}, + {File, IOSTATE::BOT}, + {FOpenFile, IOSTATE::BOT}}}); + GroundTruth.insert({MainReturn, + {{FooFile, IOSTATE::BOT}, + {File, IOSTATE::BOT}, + {FOpenFile, IOSTATE::BOT}}}); + compareResults(GroundTruth, Llvmtssolver); } TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_18) { @@ -516,36 +545,36 @@ TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_18) { initialize({PathToLlFiles + "typestate_18_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - // At exit in foo() - {17, - { - //{"2", IOSTATE::CLOSED}, - {"2", IOSTATE::BOT}, // Overapproximation due to too flat lattice! - // {"19", IOSTATE::CLOSED} // pointsTo information not sufficient - }}, - // At exit in main() - {25, - {{"2", IOSTATE::BOT}, - {"19", IOSTATE::BOT}}}}; // Overapproximation due to too flat lattice - // (would expect CLOSED for both)! - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto FooReturn = LineColFun{11, 1, "foo"}; + const auto FooFile = LineColFun{4, 16, "foo"}; + const auto MainFile = LineColFun{14, 9, "main"}; + const auto MainReturn = LineColFun{19, 3, "main"}; + GroundTruth.insert({FooReturn, {{FooFile, IOSTATE::BOT}}}); + GroundTruth.insert( + {MainReturn, {{MainFile, IOSTATE::BOT}, {FooFile, IOSTATE::BOT}}}); + compareResults(GroundTruth, Llvmtssolver); } -// TODO: Check this case again! TEST_F(IDETSAnalysisFileIOTest, HandleTypeState_19) { initialize({PathToLlFiles + "typestate_19_c_dbg.ll"}); IDESolver Llvmtssolver(*TSProblem, &HA->getICFG()); - Llvmtssolver.solve(); - const std::map> Gt = { - {11, {{"8", IOSTATE::UNINIT}}}, - {14, {{"8", IOSTATE::BOT}}}, - // At exit in main() - {25, {{"2", IOSTATE::CLOSED}, {"8", IOSTATE::CLOSED}}}}; - compareResults(Gt, Llvmtssolver); + + GroundTruthMapTy GroundTruth; + const auto FooFile = LineColFun{4, 16, "foo"}; + const auto MainFile = LineColFun{7, 9, "main"}; + const auto WhileCond = LineColFun{11, 3, "main"}; + const auto StoreCall = LineColFun{11, 13, "main"}; + const auto MainReturn = LineColFun{18, 3, "main"}; + + GroundTruth.insert({WhileCond, {{MainFile, IOSTATE::UNINIT}}}); + GroundTruth.insert({StoreCall, {{MainFile, IOSTATE::BOT}}}); + GroundTruth.insert( + {MainReturn, {{FooFile, IOSTATE::CLOSED}, {MainFile, IOSTATE::CLOSED}}}); + compareResults(GroundTruth, Llvmtssolver); } // main function for the test case diff --git a/unittests/TestUtils/SrcCodeLocationEntry.h b/unittests/TestUtils/SrcCodeLocationEntry.h new file mode 100644 index 0000000000..08d14dc463 --- /dev/null +++ b/unittests/TestUtils/SrcCodeLocationEntry.h @@ -0,0 +1,450 @@ +#ifndef PHASAR_UTILS_SRCCODELOCATIONENTRY_H +#define PHASAR_UTILS_SRCCODELOCATIONENTRY_H + +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace psr::unittest { + +struct GlobalVar { + llvm::StringRef Name; + + friend bool operator<(GlobalVar G1, GlobalVar G2) noexcept { + return G1.Name < G2.Name; + } + friend bool operator==(GlobalVar G1, GlobalVar G2) noexcept { + return G1.Name == G2.Name; + } + + [[nodiscard]] std::string str() const { + return std::string("GlobalVar { Name: ") + Name.str() + " }"; + } +}; +struct LineCol { + uint32_t Line{}; + uint32_t Col{}; + + friend bool operator<(LineCol LC1, LineCol LC2) noexcept { + return std::tie(LC1.Line, LC1.Col) < std::tie(LC2.Line, LC2.Col); + } + friend bool operator==(LineCol LC1, LineCol LC2) noexcept { + return std::tie(LC1.Line, LC1.Col) == std::tie(LC2.Line, LC2.Col); + } + [[nodiscard]] std::string str() const { + return std::string("LineCol { Line: ") + std::to_string(Line) + + "; Col: " + std::to_string(Col) + " }"; + } +}; + +struct LineColFunOp { + uint32_t Line{}; + uint32_t Col{}; + llvm::StringRef InFunction{}; + uint32_t OpCode{}; + + friend bool operator<(LineColFunOp LC1, LineColFunOp LC2) noexcept { + return std::tie(LC1.InFunction, LC1.Line, LC1.Col, LC1.OpCode) < + std::tie(LC2.InFunction, LC2.Line, LC2.Col, LC2.OpCode); + } + friend bool operator==(LineColFunOp LC1, LineColFunOp LC2) noexcept { + return std::tie(LC1.Line, LC1.Col, LC1.InFunction, LC1.OpCode) == + std::tie(LC2.Line, LC2.Col, LC2.InFunction, LC2.OpCode); + } + [[nodiscard]] std::string str() const { + return std::string("LineColFunOp { Line: ") + std::to_string(Line) + + "; Col: " + std::to_string(Col) + + "; InFunction: " + InFunction.str() + + "; OpCode: " + llvm::Instruction::getOpcodeName(OpCode) + " }"; + } +}; + +struct LineColFun { + uint32_t Line{}; + uint32_t Col{}; + llvm::StringRef InFunction{}; + + friend bool operator<(LineColFun LC1, LineColFun LC2) noexcept { + return std::tie(LC1.InFunction, LC1.Line, LC1.Col) < + std::tie(LC2.InFunction, LC2.Line, LC2.Col); + } + friend bool operator==(LineColFun LC1, LineColFun LC2) noexcept { + return std::tie(LC1.Line, LC1.Col, LC1.InFunction) == + std::tie(LC2.Line, LC2.Col, LC2.InFunction); + } + [[nodiscard]] std::string str() const { + return std::string("LineColFun { Line: ") + std::to_string(Line) + + "; Col: " + std::to_string(Col) + + "; InFunction: " + InFunction.str() + " }"; + } + + constexpr operator LineColFunOp() const noexcept { + // 0 is the wildcard opcode + return {Line, Col, InFunction, 0}; + } +}; + +struct ArgNo { + uint32_t Idx{}; + + friend bool operator<(ArgNo A1, ArgNo A2) noexcept { return A1.Idx < A2.Idx; } + friend bool operator==(ArgNo A1, ArgNo A2) noexcept { + return A1.Idx == A2.Idx; + } + [[nodiscard]] std::string str() const { + return std::string("ArgNo { Idx: ") + std::to_string(Idx) + " }"; + } +}; +struct ArgInFun { + uint32_t Idx{}; + llvm::StringRef InFunction{}; + + friend bool operator<(ArgInFun A1, ArgInFun A2) noexcept { + return std::tie(A1.InFunction, A1.Idx) < std::tie(A2.InFunction, A2.Idx); + } + friend bool operator==(ArgInFun A1, ArgInFun A2) noexcept { + return std::tie(A1.Idx, A1.InFunction) == std::tie(A2.Idx, A2.InFunction); + } + [[nodiscard]] std::string str() const { + return std::string("ArgInFun { Idx: ") + std::to_string(Idx) + + "; InFunction: " + InFunction.str() + " }"; + } +}; + +struct RetVal { + llvm::StringRef InFunction; + + friend bool operator<(RetVal R1, RetVal R2) noexcept { + return R1.InFunction < R2.InFunction; + } + friend bool operator==(RetVal R1, RetVal R2) noexcept { + return R1.InFunction == R2.InFunction; + } + [[nodiscard]] std::string str() const { + return std::string("RetVal { InFunction: ") + InFunction.str() + " }"; + } +}; +struct RetStmt { + llvm::StringRef InFunction; + + friend bool operator<(RetStmt R1, RetStmt R2) noexcept { + return R1.InFunction < R2.InFunction; + } + friend bool operator==(RetStmt R1, RetStmt R2) noexcept { + return R1.InFunction == R2.InFunction; + } + [[nodiscard]] std::string str() const { + return std::string("RetStmt { InFunction: ") + InFunction.str() + " }"; + } +}; + +struct OperandOf { + uint32_t OperandIndex{}; + LineColFunOp Inst{}; + + friend bool operator<(OperandOf R1, OperandOf R2) noexcept { + return std::tie(R1.OperandIndex, R2.Inst) < + std::tie(R2.OperandIndex, R2.Inst); + } + friend bool operator==(OperandOf R1, OperandOf R2) noexcept { + return R1.OperandIndex == R2.OperandIndex && R1.Inst == R2.Inst; + } + [[nodiscard]] std::string str() const { + return std::string("OperandOf { OperandIndex: ") + + std::to_string(OperandIndex) + "; Inst: " + Inst.str() + " }"; + } +}; + +struct TestingSrcLocation + : public std::variant { + using VarT = std::variant; + using VarT::variant; + + template [[nodiscard]] constexpr bool isa() const noexcept { + return std::holds_alternative(*this); + } + template + [[nodiscard]] constexpr const T *dyn_cast() const noexcept { + return std::get_if(this); + } + template [[nodiscard]] constexpr T *dyn_cast() noexcept { + return std::get_if(this); + } + [[nodiscard]] std::string str() const { + return std::visit([](const auto &Val) { return Val.str(); }, *this); + } + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const TestingSrcLocation &Loc) { + return OS << Loc.str(); + } + friend std::ostream &operator<<(std::ostream &OS, + const TestingSrcLocation &Loc) { + return OS << Loc.str(); + } +}; + +} // namespace psr::unittest + +namespace std { +template <> struct hash { + size_t operator()(psr::unittest::LineCol LC) const noexcept { + return llvm::hash_value(std::make_pair(LC.Line, LC.Col)); + } +}; +template <> struct hash { + size_t operator()(psr::unittest::LineColFun LCF) const noexcept { + return llvm::hash_combine( + llvm::hash_value(std::make_pair(LCF.Line, LCF.Col)), LCF.InFunction); + } +}; + +template <> struct hash { + size_t operator()(psr::unittest::LineColFunOp LCF) const noexcept { + return llvm::hash_combine( + llvm::hash_value(std::make_pair(LCF.Line, LCF.Col)), LCF.InFunction, + LCF.OpCode); + } +}; +template <> struct hash { + size_t operator()(psr::unittest::GlobalVar GV) const noexcept { + return llvm::hash_value(GV.Name); + } +}; +template <> struct hash { + size_t operator()(psr::unittest::ArgNo Arg) const noexcept { + return llvm::hash_value(Arg.Idx); + } +}; +template <> struct hash { + size_t operator()(psr::unittest::ArgInFun Arg) const noexcept { + return llvm::hash_combine(Arg.Idx, Arg.InFunction); + } +}; + +template <> struct hash { + size_t operator()(psr::unittest::RetVal Ret) const noexcept { + return llvm::hash_value(Ret.InFunction); + } +}; + +template <> struct hash { + size_t operator()(psr::unittest::RetStmt Ret) const noexcept { + return llvm::hash_value(Ret.InFunction); + } +}; + +template <> struct hash { + size_t operator()(psr::unittest::OperandOf Op) const noexcept { + return llvm::hash_combine(Op.OperandIndex, + hash{}(Op.Inst)); + } +}; + +template <> struct hash { + size_t + operator()(const psr::unittest::TestingSrcLocation &Loc) const noexcept { + return std::hash{}(Loc); + } +}; +} // namespace std + +namespace psr::unittest { + +template +[[nodiscard]] inline const llvm::Instruction * +getInstAtOrNull(const llvm::Function *F, uint32_t ReqLine, + uint32_t ReqColumn = 0, PredFn Pred = {}) { + assert(F != nullptr); + for (const auto &I : llvm::instructions(F)) { + if (I.isDebugOrPseudoInst()) { + continue; + } + + auto [Line, Column] = psr::getLineAndColFromIR(&I); + if (Line == ReqLine && (ReqColumn == 0 || ReqColumn == Column) && + std::invoke(Pred, &I)) { + return &I; + } + } + return nullptr; +} + +[[nodiscard]] inline const llvm::Value * +testingLocInIR(TestingSrcLocation Loc, + const ProjectIRDBBase &IRDB, + const llvm::Function *InterestingFunction = nullptr) { + const auto GetFunction = [&IRDB](llvm::StringRef Name) { + const auto *InFun = IRDB.getFunctionDefinition(Name); + if (!InFun) { + llvm::report_fatal_error("Required function '" + Name + + "' does not exist in the IR!"); + } + return InFun; + }; + const auto *Ret = std::visit( + psr::Overloaded{ + [=](LineCol LC) -> llvm ::Value const * { + if (!InterestingFunction) { + llvm::report_fatal_error( + "You must provide an InterestingFunction as last parameter " + "to testingLocInIR(), if trying to resolve a LineCol; " + "alternatively use LineColFun instead."); + } + + return getInstAtOrNull(InterestingFunction, LC.Line, LC.Col); + }, + [&](LineColFun LC) -> llvm ::Value const * { + const auto *InFun = GetFunction(LC.InFunction); + return getInstAtOrNull(InFun, LC.Line, LC.Col); + }, + [&](LineColFunOp LC) -> llvm ::Value const * { + const auto *InFun = GetFunction(LC.InFunction); + return getInstAtOrNull( + InFun, LC.Line, LC.Col, + [Op = LC.OpCode](const llvm::Instruction *Inst) { + // According to LLVM's doc on llvm::Value::getValueID(), there + // cannot be any opcode==0, so we use it as wildcard here + return Op == 0 || Inst->getOpcode() == Op; + }); + }, + [&IRDB](GlobalVar GV) -> llvm ::Value const * { + return IRDB.getModule()->getGlobalVariable(GV.Name, true); + }, + [=](ArgNo A) -> llvm ::Value const * { + if (!InterestingFunction) { + llvm::report_fatal_error( + "You must provide an InterestingFunction as last parameter " + "to testingLocInIR(), if trying to resolve an ArgNo; " + "alternatively use ArgInFun instead."); + } + if (InterestingFunction->arg_size() <= A.Idx) { + llvm::report_fatal_error( + "Argument index " + llvm::Twine(A.Idx) + + " is out of range (" + + llvm::Twine(InterestingFunction->arg_size()) + ")!"); + } + return InterestingFunction->getArg(A.Idx); + }, + [&](ArgInFun A) -> llvm ::Value const * { + const auto *InFun = GetFunction(A.InFunction); + if (InFun->arg_size() <= A.Idx) { + llvm::report_fatal_error("Argument index " + llvm::Twine(A.Idx) + + " is out of range (" + + llvm::Twine(InFun->arg_size()) + ")!"); + } + return InFun->getArg(A.Idx); + }, + [&](RetVal R) -> llvm::Value const * { + const auto *InFun = GetFunction(R.InFunction); + for (const auto &BB : llvm::reverse(InFun->getBasicBlockList())) { + if (const auto *Ret = + llvm::dyn_cast(BB.getTerminator())) { + return Ret->getReturnValue(); + } + } + llvm::report_fatal_error("No return stmt in function " + + R.InFunction); + }, + [&](RetStmt R) -> llvm::Value const * { + const auto *InFun = GetFunction(R.InFunction); + for (const auto &BB : llvm::reverse(InFun->getBasicBlockList())) { + if (const auto *Ret = + llvm::dyn_cast(BB.getTerminator())) { + return Ret; + } + } + llvm::report_fatal_error("No return stmt in function " + + R.InFunction); + }, + [&](OperandOf Op) -> llvm::Value const * { + const auto *Inst = llvm::dyn_cast_if_present( + testingLocInIR(Op.Inst, IRDB)); + if (!Inst) { + return nullptr; + } + + if (Inst->getNumOperands() <= Op.OperandIndex) { + llvm::report_fatal_error("Requested operand index " + + llvm::Twine(Op.OperandIndex) + + " is out of bounds for instruction " + + llvm::Twine(llvmIRToString(Inst))); + } + + return Inst->getOperand(Op.OperandIndex); + }, + }, + Loc); + if (!Ret) { + llvm::report_fatal_error("Cannot convert " + llvm::Twine(Loc.str()) + + " to LLVM"); + } + return Ret; +} + +template +[[nodiscard]] inline std::set +convertTestingLocationSetInIR( + const SetTy &Locs, const ProjectIRDBBase &IRDB, + const llvm::Function *InterestingFunction = nullptr) { + std::set Ret; + llvm::transform(Locs, std::inserter(Ret, Ret.end()), + [&](TestingSrcLocation Loc) { + return testingLocInIR(Loc, IRDB, InterestingFunction); + }); + return Ret; +} + +template +[[nodiscard]] inline auto convertTestingLocationSetMapInIR( + const MapTy &Locs, const ProjectIRDBBase &IRDB, + const llvm::Function *InterestingFunction = nullptr) { + std::map> Ret; + llvm::transform( + Locs, std::inserter(Ret, Ret.end()), [&](const auto &LocAndSet) { + const auto &[InstLoc, Set] = LocAndSet; + const auto *LocVal = testingLocInIR(InstLoc, IRDB, InterestingFunction); + const auto *LocInst = + llvm::dyn_cast_if_present(LocVal); + if (!LocInst) { + llvm::report_fatal_error( + "Cannot convert " + llvm::Twine(InstLoc.str()) + + (LocVal ? " aka. " + llvmIRToString(LocVal) : "") + + " to an LLVM instruction"); + } + auto ConvSet = + convertTestingLocationSetInIR(Set, IRDB, InterestingFunction); + return std::make_pair(LocInst, std::move(ConvSet)); + }); + return Ret; +} + +} // namespace psr::unittest + +#endif From 91cf7b9ab920f23b1d7d1593aba35c26883f8e32 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Thu, 4 Sep 2025 12:05:46 +0200 Subject: [PATCH 5/9] bugfix + cleanup --- .../DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h | 5 ----- .../TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h | 2 -- .../TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h | 2 -- .../TypeStateDescriptions/OpenSSLEVPKDFDescription.h | 3 --- .../Problems/TypeStateDescriptions/TypeStateDescription.h | 2 -- .../DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp | 4 +++- .../CSTDFILEIOTypeStateDescription.cpp | 5 ----- .../TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp | 5 ----- .../TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp | 7 ------- 9 files changed, 3 insertions(+), 32 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h index 23c2258482..daf3435792 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h @@ -512,11 +512,6 @@ class IDETypeStateAnalysis [[nodiscard]] bool isTypeNameOfInterest(llvm::StringRef Name) const noexcept override { - llvm::outs() << "TSD->getTypeNameOfInterest(): " - << TSD->getTypeNameOfInterest() << "\n"; - llvm::outs() << "Compare Name: " << Name << "\n"; - llvm::outs() << "Name.contains(TSD->getTypeNameOfInterest()): " - << Name.contains(TSD->getTypeNameOfInterest()) << "\n"; return Name.contains(TSD->getTypeNameOfInterest()); } diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h index ff7942350b..f9eadb0376 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h @@ -73,8 +73,6 @@ class CSTDFILEIOTypeStateDescription getNextState(llvm::StringRef Tok, TypeStateDescription::State S) const override; [[nodiscard]] std::string getTypeNameOfInterest() const override; - [[nodiscard]] - llvm::Metadata::MetadataKind getTypeOfInterest() const override; [[nodiscard]] std::set getConsumerParamIdx(llvm::StringRef F) const override; [[nodiscard]] std::set diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h index f0f1c69265..144d3571c4 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.h @@ -93,8 +93,6 @@ class OpenSSLEVPKDFCTXDescription getNextState(llvm::StringRef Tok, State S, const llvm::CallBase *CallSite) const override; [[nodiscard]] std::string getTypeNameOfInterest() const override; - [[nodiscard]] - llvm::Metadata::MetadataKind getTypeOfInterest() const override; [[nodiscard]] std::set getConsumerParamIdx(llvm::StringRef F) const override; [[nodiscard]] std::set diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h index 864180f780..3a87497cef 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h @@ -73,9 +73,6 @@ class OpenSSLEVPKDFDescription [[nodiscard]] std::string getTypeNameOfInterest() const override; - [[nodiscard]] - llvm::Metadata::MetadataKind getTypeOfInterest() const override; - [[nodiscard]] std::set getConsumerParamIdx(llvm::StringRef F) const override; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h index e197f97ae7..3ad3eeb279 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h @@ -28,8 +28,6 @@ struct TypeStateDescriptionBase { [[nodiscard]] virtual bool isConsumingFunction(llvm::StringRef F) const = 0; [[nodiscard]] virtual bool isAPIFunction(llvm::StringRef F) const = 0; [[nodiscard]] virtual std::string getTypeNameOfInterest() const = 0; - [[nodiscard]] virtual llvm::Metadata::MetadataKind - getTypeOfInterest() const = 0; [[nodiscard]] virtual std::set getConsumerParamIdx(llvm::StringRef F) const = 0; [[nodiscard]] virtual std::set diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp index c74a0b4c94..b96fa1b166 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp @@ -306,9 +306,11 @@ bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::Value *Value) { if (const auto *BaseTy = stripPointerTypes(VarTy)) { return hasMatchingTypeName(BaseTy); } + + return isTypeNameOfInterest(VarTy->getName()); } - return false; + return true; } bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::DIType *DITy) { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp index 1492f0aadf..df68fe1063 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp @@ -133,11 +133,6 @@ std::string CSTDFILEIOTypeStateDescription::getTypeNameOfInterest() const { return "_IO_FILE"; } -llvm::Metadata::MetadataKind -CSTDFILEIOTypeStateDescription::getTypeOfInterest() const { - return llvm::Metadata::DICompositeTypeKind; -} - std::set CSTDFILEIOTypeStateDescription::getConsumerParamIdx(llvm::StringRef F) const { if (isConsumingFunction(F)) { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp index faef984a5b..405d0f62b7 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp @@ -128,11 +128,6 @@ std::string OpenSSLEVPKDFCTXDescription::getTypeNameOfInterest() const { return "evp_kdf_ctx_st"; } -llvm::Metadata::MetadataKind -OpenSSLEVPKDFCTXDescription::getTypeOfInterest() const { - return llvm::Metadata::DICompositeTypeKind; -} - std::set OpenSSLEVPKDFCTXDescription::getConsumerParamIdx(llvm::StringRef F) const { if (isConsumingFunction(F)) { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp index a34963793c..2730647c20 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp @@ -78,13 +78,6 @@ std::string OpenSSLEVPKDFDescription::getTypeNameOfInterest() const { return "struct.evp_kdf_st"; } -llvm::Metadata::MetadataKind -OpenSSLEVPKDFDescription::getTypeOfInterest() const { - // TODO: ask Fabian what MetadataKind could work here, if any. - // Return type here is a placeholder. - return llvm::Metadata::GenericDINodeKind; -} - std::set OpenSSLEVPKDFDescription::getConsumerParamIdx(llvm::StringRef F) const { if (isConsumingFunction(F)) { From 1feaf4585542eb9ac74b0ae0c53e6b9ae10dbcb8 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 7 Sep 2025 13:53:47 +0200 Subject: [PATCH 6/9] Some cleanup --- .../IfdsIde/Problems/IDETypeStateAnalysis.h | 12 +---- .../CSTDFILEIOTypeStateDescription.h | 3 -- .../OpenSSLEVPKDFDescription.h | 4 -- .../TypeStateDescription.h | 8 +-- .../IfdsIde/Problems/IDETypeStateAnalysis.cpp | 51 +++---------------- .../CSTDFILEIOTypeStateDescription.cpp | 5 -- .../OpenSSLEVPKDFCTXDescription.cpp | 2 - .../OpenSSLEVPKDFDescription.cpp | 5 +- .../OpenSSLSecureHeapDescription.cpp | 2 +- .../OpenSSLSecureMemoryDescription.cpp | 2 +- 10 files changed, 15 insertions(+), 79 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h index ea81b838c5..07c98e416c 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h @@ -15,6 +15,7 @@ #include "phasar/DataFlow/IfdsIde/FlowFunctions.h" #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" @@ -38,9 +39,6 @@ namespace psr { -class LLVMBasedICFG; -class LLVMTypeHierarchy; - namespace detail { class IDETypeStateAnalysisBaseCommon : public LLVMAnalysisDomainDefault { @@ -135,8 +133,6 @@ class IDETypeStateAnalysisBase return generateFlow(FactToGenerate, LLVMZeroValue::getInstance()); } - bool hasMatchingTypeName(const llvm::Type *Ty); - bool hasMatchingTypeName(const llvm::Value *Value); bool hasMatchingTypeName(const llvm::DIType *DITy); std::map AliasCache; @@ -288,11 +284,7 @@ class IDETypeStateAnalysis template >> TSConstant(l_t Value, EmptyType /*unused*/ = {}) noexcept - : ConstantEdgeFunction{Value} { - if constexpr (!HasJoinLatticeTraits) { - this->TSD = TSD; - } - } + : ConstantEdgeFunction{Value} {} /// XXX: Cannot default compose() and join(), because l_t does not implement /// JoinLatticeTraits (because bottom value is not constant) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h index f9eadb0376..d5a9b4f869 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h @@ -13,11 +13,8 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/raw_ostream.h" -#include #include #include diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h index 3a87497cef..27eed099ea 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h @@ -12,10 +12,6 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/DebugInfoMetadata.h" - -#include #include #include diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h index 3ad3eeb279..169b2b8f1e 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h @@ -12,13 +12,13 @@ #include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/InstrTypes.h" - #include #include +namespace llvm { +class CallBase; +} // namespace llvm + namespace psr { struct TypeStateDescriptionBase { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp index b96fa1b166..f329c3445d 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp @@ -288,62 +288,23 @@ auto IDETypeStateAnalysisBase::getLocalAliasesAndAllocas( return AliasAndAllocas; } -bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::Type *Ty) { - if (const auto *StructTy = llvm::dyn_cast(Ty); - StructTy && StructTy->hasName()) { - return isTypeNameOfInterest(StructTy->getName()); - } - // primitive type - std::string Str; - llvm::raw_string_ostream S(Str); - S << *Ty; - S.flush(); - return isTypeNameOfInterest(Str); -} - -bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::Value *Value) { - if (const auto *VarTy = getVarTypeFromIR(Value)) { - if (const auto *BaseTy = stripPointerTypes(VarTy)) { - return hasMatchingTypeName(BaseTy); - } - - return isTypeNameOfInterest(VarTy->getName()); - } - - return true; -} - bool IDETypeStateAnalysisBase::hasMatchingTypeName(const llvm::DIType *DITy) { if (llvm::isa(DITy) && !DITy->getName().empty()) { return isTypeNameOfInterest(DITy->getName()); } - return false; + return true; // Conservatively return true } bool IDETypeStateAnalysisBase::hasMatchingType(d_t V) { - // General case - if (V->getType()->isPointerTy()) { - if (hasMatchingTypeName(V)) { - return true; + if (const auto *VarTy = getVarTypeFromIR(V)) { + if (const auto *BaseTy = stripPointerTypes(VarTy)) { + return hasMatchingTypeName(BaseTy); } - // fallthrough - } - if (const auto *Alloca = llvm::dyn_cast(V)) { - return Alloca->getAllocatedType()->isPointerTy() && - hasMatchingTypeName(Alloca); - } - - if (const auto *Load = llvm::dyn_cast(V)) { - return Load->getType()->isPointerTy() && hasMatchingTypeName(Load); - } - - if (const auto *Store = llvm::dyn_cast(V)) { - return Store->getValueOperand()->getType()->isPointerTy() && - hasMatchingTypeName(Store->getValueOperand()); + return isTypeNameOfInterest(VarTy->getName()); } - return false; + return true; } } // namespace psr::detail diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp index df68fe1063..c00c1f2cbe 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.cpp @@ -9,12 +9,7 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/CSTDFILEIOTypeStateDescription.h" -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" - #include "llvm/ADT/StringMap.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Metadata.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp index 405d0f62b7..c46ec6d775 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFCTXDescription.cpp @@ -11,8 +11,6 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp index 2730647c20..beeebd9b81 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.cpp @@ -9,9 +9,6 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLEVPKDFDescription.h" -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h" - -#include "llvm/IR/Metadata.h" #include "llvm/Support/ErrorHandling.h" #include @@ -75,7 +72,7 @@ OpenSSLEVPKDFDescription::getNextState(llvm::StringRef Tok, } std::string OpenSSLEVPKDFDescription::getTypeNameOfInterest() const { - return "struct.evp_kdf_st"; + return "evp_kdf_st"; } std::set diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.cpp index 716f682a4f..fcdcb70324 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureHeapDescription.cpp @@ -107,7 +107,7 @@ OpenSSLSecureHeapState OpenSSLSecureHeapDescription::getNextState( } std::string OpenSSLSecureHeapDescription::getTypeNameOfInterest() const { - return "i8"; + return {}; } set diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureMemoryDescription.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureMemoryDescription.cpp index a67f17e42b..f836b5d0e2 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureMemoryDescription.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/OpenSSLSecureMemoryDescription.cpp @@ -110,7 +110,7 @@ OpenSSLSecureMemoryState OpenSSLSecureMemoryDescription::getNextState( } std::string OpenSSLSecureMemoryDescription::getTypeNameOfInterest() const { - return "i8"; // NOT SURE WHAT TO DO WITH THIS + return {}; } set From abeb431be152b064a783d6768553e1cefce1b9b1 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 7 Sep 2025 14:14:09 +0200 Subject: [PATCH 7/9] Implement allocated-types collection in terms of debug info --- .../Passes/GeneralStatisticsAnalysis.h | 4 +- .../phasar/PhasarLLVM/Utils/AllocatedTypes.h | 27 ++++++++ .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 8 +++ .../ControlFlow/Resolver/RTAResolver.cpp | 51 +------------- .../Passes/GeneralStatisticsAnalysis.cpp | 35 ++-------- lib/PhasarLLVM/Utils/AllocatedTypes.cpp | 67 +++++++++++++++++++ lib/PhasarLLVM/Utils/LLVMShorthands.cpp | 28 ++++++++ 7 files changed, 140 insertions(+), 80 deletions(-) create mode 100644 include/phasar/PhasarLLVM/Utils/AllocatedTypes.h create mode 100644 lib/PhasarLLVM/Utils/AllocatedTypes.cpp diff --git a/include/phasar/PhasarLLVM/Passes/GeneralStatisticsAnalysis.h b/include/phasar/PhasarLLVM/Passes/GeneralStatisticsAnalysis.h index fddf1e98d5..fce1a4103b 100644 --- a/include/phasar/PhasarLLVM/Passes/GeneralStatisticsAnalysis.h +++ b/include/phasar/PhasarLLVM/Passes/GeneralStatisticsAnalysis.h @@ -20,6 +20,7 @@ #include "llvm/IR/PassManager.h" #include +#include namespace llvm { class Type; @@ -27,6 +28,7 @@ class Value; class Instruction; class AnalysisUsage; class Module; +class DICompositeType; } // namespace llvm namespace psr { @@ -67,7 +69,7 @@ struct GeneralStatistics { size_t NumInstWithMultipleUses = 0; size_t NumInstsUsedOutsideBB = 0; size_t NonVoidInsts = 0; - std::set AllocatedTypes; + std::vector AllocatedTypes; std::set AllocaInstructions; std::set RetResInstructions; std::string ModuleName{}; diff --git a/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h b/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h new file mode 100644 index 0000000000..08f8f70315 --- /dev/null +++ b/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h @@ -0,0 +1,27 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_UTILS_ALLOCATEDTYPES_H +#define PHASAR_PHASARLLVM_UTILS_ALLOCATEDTYPES_H + +#include "llvm/IR/DebugInfoMetadata.h" + +#include + +namespace psr { +class LLVMProjectIRDB; + +[[nodiscard]] std::vector +collectAllocatedTypes(const LLVMProjectIRDB &IRDB); + +[[nodiscard]] std::vector +collectAllocatedTypes(const llvm::Module &Mod); +} // namespace psr + +#endif // PHASAR_PHASARLLVM_UTILS_ALLOCATEDTYPES_H diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index d381f7ee69..582fef9500 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -104,6 +104,14 @@ std::string llvmIRToShortString(const llvm::Value *V); [[nodiscard]] std::string llvmTypeToString(const llvm::Type *Ty, bool Shorten = false); +/** + * @brief Returns a string-representation of a LLVM Debug-Info type. + * + * @param Shorten Tries to shorten the output + */ +[[nodiscard]] std::string llvmTypeToString(const llvm::DIType *Ty, + bool Shorten = false); + LLVM_DUMP_METHOD void dumpIRValue(const llvm::Value *V); LLVM_DUMP_METHOD void dumpIRValue(const llvm::Instruction *V); LLVM_DUMP_METHOD void dumpIRValue(const llvm::Function *V); diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp index 33afea40c2..d0eaa8a822 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp @@ -18,17 +18,12 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" -#include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" +#include "phasar/PhasarLLVM/Utils/AllocatedTypes.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" using namespace psr; @@ -85,52 +80,10 @@ void RTAResolver::resolveVirtualCall(FunctionSetTy &PossibleTargets, std::string RTAResolver::str() const { return "RTA"; } -static const llvm::DICompositeType * -isCompositeStructType(const llvm::DIType *Ty) { - if (const auto *CompTy = llvm::dyn_cast_if_present(Ty); - CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || - CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { - - return CompTy; - } - - return nullptr; -} - void RTAResolver::resolveAllocatedCompositeTypes() { if (!AllocatedCompositeTypes.empty()) { return; } - llvm::DenseSet AllocatedTypes; - - for (const auto *Inst : IRDB->getAllInstructions()) { - if (const auto *Alloca = llvm::dyn_cast(Inst)) { - if (const auto *Ty = isCompositeStructType(getVarTypeFromIR(Alloca))) { - AllocatedTypes.insert(Ty); - } - } else if (const auto *Call = llvm::dyn_cast(Inst)) { - if (const auto *Callee = llvm::dyn_cast( - Call->getCalledOperand()->stripPointerCastsAndAliases())) { - if (psr::isHeapAllocatingFunction(Callee)) { - const auto *MDNode = Call->getMetadata("heapallocsite"); - if (const auto *CompTy = llvm:: -#if LLVM_VERSION_MAJOR >= 15 - dyn_cast_if_present -#else - dyn_cast_or_null -#endif - (MDNode); - isCompositeStructType(CompTy)) { - - AllocatedTypes.insert(CompTy); - } - } - } - } - } - - AllocatedCompositeTypes.reserve(AllocatedTypes.size()); - AllocatedCompositeTypes.insert(AllocatedCompositeTypes.end(), - AllocatedTypes.begin(), AllocatedTypes.end()); + AllocatedCompositeTypes = collectAllocatedTypes(*IRDB); } diff --git a/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp b/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp index 4575fa59ea..9b5befda55 100644 --- a/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp +++ b/lib/PhasarLLVM/Passes/GeneralStatisticsAnalysis.cpp @@ -9,6 +9,7 @@ #include "phasar/PhasarLLVM/Passes/GeneralStatisticsAnalysis.h" +#include "phasar/PhasarLLVM/Utils/AllocatedTypes.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/NlohmannLogging.h" @@ -41,31 +42,6 @@ static bool isAddressTaken(const llvm::Function &Fun) noexcept { return false; } -template -static void collectAllocatedTypes(const llvm::CallBase *CallSite, Set &Into) { - for (const auto *User : CallSite->users()) { - if (const auto *Cast = llvm::dyn_cast(User); - Cast && Cast->getDestTy()->isPointerTy() && - !Cast->getDestTy()->isOpaquePointerTy()) { - const auto *ElemTy = Cast->getDestTy()->getNonOpaquePointerElementType(); - if (ElemTy->isStructTy()) { - // finally check for ctor call - for (const auto *User : Cast->users()) { - if (llvm::isa(User)) { - // potential call to the structures ctor - const auto *CTor = llvm::cast(User); - if (CTor->getCalledFunction() && - CTor->getCalledFunction()->getArg(0)->getType() == - Cast->getDestTy()) { - Into.insert(ElemTy); - } - } - } - } - } - } -} - llvm::AnalysisKey GeneralStatisticsAnalysis::Key; // NOLINT GeneralStatistics GeneralStatisticsAnalysis::runOnModule(llvm::Module &M) { PHASAR_LOG_LEVEL(INFO, "Running GeneralStatisticsAnalysis"); @@ -131,7 +107,6 @@ GeneralStatistics GeneralStatisticsAnalysis::runOnModule(llvm::Module &M) { // check for alloca instruction for possible types if (const llvm::AllocaInst *Alloc = llvm::dyn_cast(&I)) { - Stats.AllocatedTypes.insert(Alloc->getAllocatedType()); // do not add allocas from llvm internal functions Stats.AllocaInstructions.insert(&I); ++Stats.AllocationSites; @@ -186,9 +161,6 @@ GeneralStatistics GeneralStatisticsAnalysis::runOnModule(llvm::Module &M) { // do not add allocas from llvm internal functions Stats.AllocaInstructions.insert(&I); ++Stats.AllocationSites; - // check if an instance of a user-defined type is allocated on the - // heap - collectAllocatedTypes(CallSite, Stats.AllocatedTypes); } } else { ++Stats.IndCalls; @@ -197,6 +169,9 @@ GeneralStatistics GeneralStatisticsAnalysis::runOnModule(llvm::Module &M) { } } } + + Stats.AllocatedTypes = collectAllocatedTypes(M); + // check for global pointers for (const auto &Global : M.globals()) { ++Stats.Globals; @@ -323,7 +298,7 @@ template struct AlignNum { } }; template AlignNum(llvm::StringRef, T) -> AlignNum; -AlignNum(llvm::StringRef, size_t, size_t)->AlignNum; +AlignNum(llvm::StringRef, size_t, size_t) -> AlignNum; } // namespace llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, diff --git a/lib/PhasarLLVM/Utils/AllocatedTypes.cpp b/lib/PhasarLLVM/Utils/AllocatedTypes.cpp new file mode 100644 index 0000000000..1a5cf033f7 --- /dev/null +++ b/lib/PhasarLLVM/Utils/AllocatedTypes.cpp @@ -0,0 +1,67 @@ +#include "phasar/PhasarLLVM/Utils/AllocatedTypes.h" + +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" + +static const llvm::DICompositeType * +isCompositeStructType(const llvm::DIType *Ty) { + if (const auto *CompTy = llvm::dyn_cast_if_present(Ty); + CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || + CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { + + return CompTy; + } + + return nullptr; +} + +std::vector +psr::collectAllocatedTypes(const llvm::Module &Mod) { + llvm::DenseSet AllocatedTypes; + + for (const auto &Fun : Mod) { + for (const auto &Inst : llvm::instructions(Fun)) { + if (const auto *Alloca = llvm::dyn_cast(&Inst)) { + if (const auto *Ty = isCompositeStructType(getVarTypeFromIR(Alloca))) { + AllocatedTypes.insert(Ty); + } + } else if (const auto *Call = llvm::dyn_cast(&Inst)) { + if (const auto *Callee = llvm::dyn_cast( + Call->getCalledOperand()->stripPointerCastsAndAliases())) { + if (psr::isHeapAllocatingFunction(Callee)) { + const auto *MDNode = Call->getMetadata("heapallocsite"); + if (const auto *CompTy = llvm:: +#if LLVM_VERSION_MAJOR >= 15 + dyn_cast_if_present +#else + dyn_cast_or_null +#endif + (MDNode); + isCompositeStructType(CompTy)) { + + AllocatedTypes.insert(CompTy); + } + } + } + } + } + } + + std::vector AllocatedCompositeTypes; + AllocatedCompositeTypes.reserve(AllocatedTypes.size()); + AllocatedCompositeTypes.insert(AllocatedCompositeTypes.end(), + AllocatedTypes.begin(), AllocatedTypes.end()); + return AllocatedCompositeTypes; +} + +std::vector +psr::collectAllocatedTypes(const LLVMProjectIRDB &IRDB) { + return collectAllocatedTypes(*IRDB.getModule()); +} diff --git a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp index 2e1f25994a..eb991a8ffa 100644 --- a/lib/PhasarLLVM/Utils/LLVMShorthands.cpp +++ b/lib/PhasarLLVM/Utils/LLVMShorthands.cpp @@ -235,6 +235,34 @@ std::string psr::llvmTypeToString(const llvm::Type *Ty, bool Shorten) { return IRBuffer; } +std::string psr::llvmTypeToString(const llvm::DIType *Ty, bool Shorten) { + if (!Ty) { + return ""; + } + + std::string Ret; + + if (Shorten) { + Ret = Ty->getName().str(); + if (!Ret.empty()) { + // Try to get a fully-qualified name + + const auto *Scope = Ty->getScope(); + while (llvm::isa_and_nonnull(Scope)) { + // XXX: Optimize this + Ret = Scope->getName().str().append("::").append(Ret); + Scope = Scope->getScope(); + } + return Ret; + } + } + + llvm::raw_string_ostream RSO(Ret); + Ty->print(RSO); + return Ret; +} + void psr::dumpIRValue(const llvm::Value *V) { llvm::outs() << llvmIRToString(V) << '\n'; } From 963a87ecece9598417c310820d52bed918c5d614 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 7 Sep 2025 14:21:52 +0200 Subject: [PATCH 8/9] Fix libdeps --- include/phasar/PhasarLLVM/Utils/AllocatedTypes.h | 5 ----- lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp | 2 +- lib/PhasarLLVM/Utils/AllocatedTypes.cpp | 5 ----- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h b/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h index 08f8f70315..98bdf91dc5 100644 --- a/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h +++ b/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h @@ -15,11 +15,6 @@ #include namespace psr { -class LLVMProjectIRDB; - -[[nodiscard]] std::vector -collectAllocatedTypes(const LLVMProjectIRDB &IRDB); - [[nodiscard]] std::vector collectAllocatedTypes(const llvm::Module &Mod); } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp index d0eaa8a822..8924999e22 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp @@ -85,5 +85,5 @@ void RTAResolver::resolveAllocatedCompositeTypes() { return; } - AllocatedCompositeTypes = collectAllocatedTypes(*IRDB); + AllocatedCompositeTypes = collectAllocatedTypes(*IRDB->getModule()); } diff --git a/lib/PhasarLLVM/Utils/AllocatedTypes.cpp b/lib/PhasarLLVM/Utils/AllocatedTypes.cpp index 1a5cf033f7..f8e4f03ff3 100644 --- a/lib/PhasarLLVM/Utils/AllocatedTypes.cpp +++ b/lib/PhasarLLVM/Utils/AllocatedTypes.cpp @@ -60,8 +60,3 @@ psr::collectAllocatedTypes(const llvm::Module &Mod) { AllocatedTypes.begin(), AllocatedTypes.end()); return AllocatedCompositeTypes; } - -std::vector -psr::collectAllocatedTypes(const LLVMProjectIRDB &IRDB) { - return collectAllocatedTypes(*IRDB.getModule()); -} From 7b5d45063d2f547097cf3c9101ca5272536800fb Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 8 Sep 2025 14:05:00 +0200 Subject: [PATCH 9/9] Remove some unneeded includes --- .../DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h | 3 --- include/phasar/PhasarLLVM/Utils/AllocatedTypes.h | 1 + .../DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp | 7 ------- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h index 07c98e416c..a09c1a63b1 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h @@ -24,12 +24,9 @@ #include "phasar/Utils/Printer.h" #include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Demangle/Demangle.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Value.h" #include diff --git a/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h b/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h index 98bdf91dc5..eeba5632ee 100644 --- a/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h +++ b/include/phasar/PhasarLLVM/Utils/AllocatedTypes.h @@ -11,6 +11,7 @@ #define PHASAR_PHASARLLVM_UTILS_ALLOCATEDTYPES_H #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Module.h" #include diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp index f329c3445d..03e287e869 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.cpp @@ -9,10 +9,7 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDETypeStateAnalysis.h" -#include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" -#include "phasar/DataFlow/IfdsIde/FlowFunctions.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/TypeStateDescriptions/TypeStateDescription.h" @@ -21,10 +18,7 @@ #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Demangle/Demangle.h" -#include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" @@ -35,7 +29,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include #include namespace psr::detail {