From e41b4b2f3d9e601cf2e5b3ed39a3fe560d9daf64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D9=85=D8=B5=D8=B7=D9=81=D9=8A=20=D9=85=D8=AD=D9=85=D9=88?= =?UTF-8?q?=D8=AF=20=D9=83=D9=85=D8=A7=D9=84=20=D8=A7=D9=84=D8=AF=D9=8A?= =?UTF-8?q?=D9=86?= <48567303+moste00@users.noreply.github.com> Date: Fri, 18 Jul 2025 01:31:01 +0300 Subject: [PATCH 1/6] Added RISCV to supported architectures and added code to recognize its immediate operands --- llvm/utils/TableGen/Printer.h | 2 ++ llvm/utils/TableGen/PrinterCapstone.cpp | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/llvm/utils/TableGen/Printer.h b/llvm/utils/TableGen/Printer.h index 551fee2ba9e3..9380ec6b404f 100644 --- a/llvm/utils/TableGen/Printer.h +++ b/llvm/utils/TableGen/Printer.h @@ -25,6 +25,8 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" +#include + typedef enum { ST_NONE, ST_DECL_OS, diff --git a/llvm/utils/TableGen/PrinterCapstone.cpp b/llvm/utils/TableGen/PrinterCapstone.cpp index 413b1b791e2a..95a13114a024 100644 --- a/llvm/utils/TableGen/PrinterCapstone.cpp +++ b/llvm/utils/TableGen/PrinterCapstone.cpp @@ -1116,7 +1116,7 @@ void PrinterCapstone::decoderEmitterEmitDecodeInstruction( std::set InsnBytesAsUint24 = {"Xtensa"}; std::set InsnBytesAsUint32 = {"ARM", "AArch64", "LoongArch", "Alpha", "Mips", "TriCore", - "ARC", "Sparc"}; + "ARC", "Sparc", "RISCV"}; std::set InsnBytesAsUint64 = {"SystemZ", "ARC"}; bool MacroDefined = false; if (InsnBytesAsUint16.find(TargetName) != InsnBytesAsUint16.end()) { @@ -2983,6 +2983,15 @@ Record *argInitOpToRecord(Init *ArgInit) { return Rec; } +// diagram +// https://regexper.com/#OPERAND_%5BUS%5DIMM%5B0-9%5D%7B1%2C2%7D%28_%5BA-Z0-9%5D%2B%29*%7COPERAND_ZERO%7COPERAND_RVKRNUM%7COPERAND_VTYPEI%5B0-9%5D%7B1%2C2%7D%7COPERAND_UIMMLOG2XLEN%28_NONZERO%29%3F%7COPERAND_CLUI_IMM +static const std::regex RiscvImmOperandsPattern( + "OPERAND_[US]IMM[0-9]{1,2}(_[A-Z0-9]+)*" // e.g. OPERAND_UIMM12_NONZERO_BLAHBLAH42 + "|" "OPERAND_ZERO" "|" "OPERAND_RVKRNUM" + "|" "OPERAND_VTYPEI[0-9]{1,2}" // e.g. OPERAND_VTYPEI10 + "|" "OPERAND_UIMMLOG2XLEN(_NONZERO)?" + "|" "OPERAND_CLUI_IMM"); + std::string getPrimaryCSOperandType(Record const *OpRec) { std::string OperandType; if (OpRec->isSubClassOf("PredicateOperand")) @@ -3026,6 +3035,10 @@ std::string getPrimaryCSOperandType(Record const *OpRec) { return "CS_OP_REG"; else if (OperandType == "OPERAND_NM_GPREL21") return "CS_OP_REG"; + // RISCV (keep this as the last check because it matches a lot of strings, + // so it might shadow another architecture's operand names if it's moved up) + else if (std::regex_match(OperandType, RiscvImmOperandsPattern)) + return "CS_OP_IMM"; else if (OperandType == "OPERAND_NM_SAVE_REGLIST") return "CS_OP_INVALID"; else From ca565fc7e00b019d58d0193a5712ea75f97fbf80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D9=85=D8=B5=D8=B7=D9=81=D9=8A=20=D9=85=D8=AD=D9=85=D9=88?= =?UTF-8?q?=D8=AF=20=D9=83=D9=85=D8=A7=D9=84=20=D8=A7=D9=84=D8=AF=D9=8A?= =?UTF-8?q?=D9=86?= <48567303+moste00@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:29:47 +0300 Subject: [PATCH 2/6] added support for primary key index generation and other fixes --- llvm/utils/TableGen/PrinterCapstone.cpp | 88 ++++++++++++++++--- .../utils/TableGen/SearchableTableEmitter.cpp | 7 ++ 2 files changed, 84 insertions(+), 11 deletions(-) diff --git a/llvm/utils/TableGen/PrinterCapstone.cpp b/llvm/utils/TableGen/PrinterCapstone.cpp index 95a13114a024..ac51be956f18 100644 --- a/llvm/utils/TableGen/PrinterCapstone.cpp +++ b/llvm/utils/TableGen/PrinterCapstone.cpp @@ -1112,7 +1112,7 @@ void PrinterCapstone::decoderEmitterEmitDecodeInstruction( << " /* Bogisity detected in disassembler state machine! */ \\\n" << "}\n\n"; - std::set InsnBytesAsUint16 = {"ARM", "TriCore", "ARC"}; + std::set InsnBytesAsUint16 = {"ARM", "TriCore", "ARC", "RISCV"}; std::set InsnBytesAsUint24 = {"Xtensa"}; std::set InsnBytesAsUint32 = {"ARM", "AArch64", "LoongArch", "Alpha", "Mips", "TriCore", @@ -4094,6 +4094,8 @@ std::string getTableNamespacePrefix(const GenericTable &Table, NSTable = &ARMNSTypePairs; else if (StringRef(TargetName).upper() == "SPARC") NSTable = &SparcNSTypePairs; + else if (StringRef(TargetName).upper() == "RISCV") + return "RISCV_"; // don't bother with a table else PrintFatalNote("No Namespace Type table defined for target."); @@ -4179,9 +4181,70 @@ void PrinterCapstone::searchableTablesEmitIfFieldCase( void PrinterCapstone::searchableTablesEmitKeyTypeStruct( const GenericTable &Table, const SearchIndex &Index) const {} +uint64_t BitsInitToUInt(const BitsInit *BI) { + uint64_t Value = 0; + for (unsigned I = 0, Ie = BI->getNumBits(); I != Ie; ++I) { + if (BitInit *B = dyn_cast(BI->getBit(I))) + Value |= (uint64_t)B->getValue() << I; + } + return Value; +} + void PrinterCapstone::searchableTablesEmitKeyArray(const GenericTable &Table, const SearchIndex &Index, - bool IsPrimary) const {} + bool IsPrimary) const { + if (!IsPrimary) + return; + if (Index.Fields.size() != 1) + return; + + GenericField IndexField = Index.Fields[0]; + if (IndexField.RecType == nullptr) + return; + + RecTy::RecTyKind Kind = IndexField.RecType->getRecTyKind(); + // only numerical or string fields are searchable + if (Kind != RecTy::BitRecTyKind && Kind != RecTy::BitsRecTyKind + && Kind != RecTy::IntRecTyKind && Kind != RecTy::StringRecTyKind) + return; + + raw_string_ostream &OS = searchableTablesGetOS(ST_IMPL_OS); + OS << "static const struct "; + + bool IsNumericIndex = Kind != RecTy::StringRecTyKind; + // which struct to emit to represent the index type ? + if (IsNumericIndex) + OS << "IndexType"; + else + OS << "IndexTypeStr"; + + ListSeparator LS; + OS << " Index[] = {\n" << LS; + + int64_t idx = 0; + for (auto & entry : Table.Entries) { + OS << "{"; + switch (Kind) { + case RecTy::BitRecTyKind: + OS << ((entry->getValueAsBit(IndexField.Name))? "true" : "false"); + break; + case RecTy::BitsRecTyKind: + OS << BitsInitToUInt(entry->getValueAsBitsInit(IndexField.Name)); + break; + case RecTy::IntRecTyKind: + OS << entry->getValueAsInt(IndexField.Name); + break; + case RecTy::StringRecTyKind: + OS << entry->getValueAsString(IndexField.Name); + break; + default: + llvm_unreachable("Kind of Index MUST be Bit, Bits, Int, or String"); + } + OS << "," << idx << "}" << LS << "\n"; + idx++; + } + OS << "};\n"; +} void PrinterCapstone::searchableTablesEmitIndexLamda( const SearchIndex &Index, StringRef const &IndexName, @@ -4224,14 +4287,6 @@ void PrinterCapstone::searchableTablesEmitMapII() const { OutS << " { "; } -uint64_t BitsInitToUInt(const BitsInit *BI) { - uint64_t Value = 0; - for (unsigned I = 0, Ie = BI->getNumBits(); I != Ie; ++I) { - if (BitInit *B = dyn_cast(BI->getBit(I))) - Value |= (uint64_t)B->getValue() << I; - } - return Value; -} unsigned getEnumValue(Record *Entry) { if (!Entry->getValue("EnumValueField") || @@ -4241,8 +4296,19 @@ unsigned getEnumValue(Record *Entry) { BitsInit *BI = Entry->getValueAsBitsInit("Encoding"); return BitsInitToUInt(BI); } + // RISCV fields + if (Entry->getValue("Inst")) { + BitsInit *BI = Entry->getValueAsBitsInit("Inst"); + return BitsInitToUInt(BI); + } + if (Entry->getValue("Value")) { + BitsInit *BI = Entry->getValueAsBitsInit("Value"); + return BitsInitToUInt(BI); + } + PrintWarning("Couldn't find an enum value for the following entry, returning a dummy 0 value"); Entry->dump(); - PrintFatalNote("Which of those fields above are the encoding/enum value?"); + PrintWarning("Which of those fields above are the encoding/enum value?"); + return 0; } StringRef EnumValField = Entry->getValueAsString("EnumValueField"); return BitsInitToUInt(Entry->getValueAsBitsInit(EnumValField)); diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index 5b722b49af6b..336507a2edf7 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -466,6 +466,9 @@ void SearchableTableEmitter::run() { } for (auto *TableRec : Records.getAllDerivedDefinitions("GenericTable")) { + if (TableRec->getName().str() == "RISCVVIntrinsicsTable") + continue; // special case because this table has invalid characters that always fails compilation + auto Table = std::make_unique(); Table->Name = std::string(TableRec->getName()); Table->Locs = TableRec->getLoc(); @@ -637,6 +640,10 @@ void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS) { // Sparc's lowest class is InstSP not I IDef = RK.getClass("InstSP"); } + if (!IDef) { + // try RISCV's root class which is RVInstCommon not I + IDef = RK.getClass("RVInstCommon"); + } if (!IDef) { // If this is reached we need to implement the search for other classes which have Namespace set. llvm_unreachable("Root instruction class \"I\" does not exist for this target."); From 974f448ae98c22a4989217997a5264ab312623d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D9=85=D8=B5=D8=B7=D9=81=D9=8A=20=D9=85=D8=AD=D9=85=D9=88?= =?UTF-8?q?=D8=AF=20=D9=83=D9=85=D8=A7=D9=84=20=D8=A7=D9=84=D8=AF=D9=8A?= =?UTF-8?q?=D9=86?= <48567303+moste00@users.noreply.github.com> Date: Sat, 23 Aug 2025 21:42:58 +0300 Subject: [PATCH 3/6] added compressed instruction info generation and the necessary fixes --- llvm/utils/TableGen/CompressInstEmitter.cpp | 453 +------------------- llvm/utils/TableGen/Printer.h | 65 +++ llvm/utils/TableGen/PrinterCapstone.cpp | 167 ++++++++ llvm/utils/TableGen/PrinterLLVM.cpp | 412 ++++++++++++++++++ 4 files changed, 661 insertions(+), 436 deletions(-) diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp index f703fff0ef3e..c7fad2a782a3 100644 --- a/llvm/utils/TableGen/CompressInstEmitter.cpp +++ b/llvm/utils/TableGen/CompressInstEmitter.cpp @@ -75,6 +75,7 @@ #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" +#include "Printer.h" #include #include using namespace llvm; @@ -83,45 +84,11 @@ using namespace llvm; namespace { class CompressInstEmitter { - struct OpData { - enum MapKind { Operand, Imm, Reg }; - MapKind Kind; - union { - // Operand number mapped to. - unsigned Operand; - // Integer immediate value. - int64_t Imm; - // Physical register. - Record *Reg; - } Data; - // Tied operand index within the instruction. - int TiedOpIdx = -1; - }; - struct CompressPat { - // The source instruction definition. - CodeGenInstruction Source; - // The destination instruction to transform to. - CodeGenInstruction Dest; - // Required target features to enable pattern. - std::vector PatReqFeatures; - // Maps operands in the Source Instruction to - // the corresponding Dest instruction operand. - IndexedMap SourceOperandMap; - // Maps operands in the Dest Instruction - // to the corresponding Source instruction operand. - IndexedMap DestOperandMap; - - bool IsCompressOnly; - CompressPat(CodeGenInstruction &S, CodeGenInstruction &D, - std::vector RF, IndexedMap &SourceMap, - IndexedMap &DestMap, bool IsCompressOnly) - : Source(S), Dest(D), PatReqFeatures(RF), SourceOperandMap(SourceMap), - DestOperandMap(DestMap), IsCompressOnly(IsCompressOnly) {} - }; - enum EmitterType { Compress, Uncompress, CheckCompress }; + RecordKeeper &Records; CodeGenTarget Target; SmallVector CompressPatterns; + PrinterLLVM *PI; void addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Inst, IndexedMap &OperandMap, bool IsSourceInst); @@ -483,413 +450,27 @@ void CompressInstEmitter::evaluateCompressPat(Record *Rec) { Rec->getValueAsBit("isCompressOnly"))); } -static void -getReqFeatures(std::set> &FeaturesSet, - std::set>> &AnyOfFeatureSets, - const std::vector &ReqFeatures) { - for (auto &R : ReqFeatures) { - const DagInit *D = R->getValueAsDag("AssemblerCondDag"); - std::string CombineType = D->getOperator()->getAsString(); - if (CombineType != "any_of" && CombineType != "all_of") - PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!"); - if (D->getNumArgs() == 0) - PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!"); - bool IsOr = CombineType == "any_of"; - std::set> AnyOfSet; - - for (auto *Arg : D->getArgs()) { - bool IsNot = false; - if (auto *NotArg = dyn_cast(Arg)) { - if (NotArg->getOperator()->getAsString() != "not" || - NotArg->getNumArgs() != 1) - PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!"); - Arg = NotArg->getArg(0); - IsNot = true; - } - if (!isa(Arg) || - !cast(Arg)->getDef()->isSubClassOf("SubtargetFeature")) - PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!"); - if (IsOr) - AnyOfSet.insert({IsNot, cast(Arg)->getDef()->getName()}); - else - FeaturesSet.insert({IsNot, cast(Arg)->getDef()->getName()}); - } - - if (IsOr) - AnyOfFeatureSets.insert(AnyOfSet); - } -} - -static unsigned getPredicates(DenseMap &PredicateMap, - std::vector &Predicates, - Record *Rec, StringRef Name) { - unsigned &Entry = PredicateMap[Rec]; - if (Entry) - return Entry; - - if (!Rec->isValueUnset(Name)) { - Predicates.push_back(Rec); - Entry = Predicates.size(); - return Entry; - } - - PrintFatalError(Rec->getLoc(), "No " + Name + - " predicate on this operand at all: '" + - Rec->getName() + "'"); - return 0; -} - -static void printPredicates(const std::vector &Predicates, - StringRef Name, raw_ostream &OS) { - for (unsigned I = 0; I < Predicates.size(); ++I) { - StringRef Pred = Predicates[I]->getValueAsString(Name); - OS << " case " << I + 1 << ": {\n" - << " // " << Predicates[I]->getName() << "\n" - << " " << Pred << "\n" - << " }\n"; - } -} - -static void mergeCondAndCode(raw_ostream &CombinedStream, StringRef CondStr, - StringRef CodeStr) { - // Remove first indentation and last '&&'. - CondStr = CondStr.drop_front(6).drop_back(4); - CombinedStream.indent(4) << "if (" << CondStr << ") {\n"; - CombinedStream << CodeStr; - CombinedStream.indent(4) << " return true;\n"; - CombinedStream.indent(4) << "} // if\n"; -} - void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS, EmitterType EType) { - Record *AsmWriter = Target.getAsmWriter(); - if (!AsmWriter->getValueAsInt("PassSubtarget")) - PrintFatalError(AsmWriter->getLoc(), - "'PassSubtarget' is false. SubTargetInfo object is needed " - "for target features.\n"); - - StringRef TargetName = Target.getName(); - - // Sort entries in CompressPatterns to handle instructions that can have more - // than one candidate for compression\uncompression, e.g ADD can be - // transformed to a C_ADD or a C_MV. When emitting 'uncompress()' function the - // source and destination are flipped and the sort key needs to change - // accordingly. - llvm::stable_sort(CompressPatterns, [EType](const CompressPat &LHS, - const CompressPat &RHS) { - if (EType == EmitterType::Compress || EType == EmitterType::CheckCompress) - return (LHS.Source.TheDef->getName() < RHS.Source.TheDef->getName()); - return (LHS.Dest.TheDef->getName() < RHS.Dest.TheDef->getName()); - }); - - // A list of MCOperandPredicates for all operands in use, and the reverse map. - std::vector MCOpPredicates; - DenseMap MCOpPredicateMap; - // A list of ImmLeaf Predicates for all operands in use, and the reverse map. - std::vector ImmLeafPredicates; - DenseMap ImmLeafPredicateMap; - - std::string F; - std::string FH; - raw_string_ostream Func(F); - raw_string_ostream FuncH(FH); - - if (EType == EmitterType::Compress) - OS << "\n#ifdef GEN_COMPRESS_INSTR\n" - << "#undef GEN_COMPRESS_INSTR\n\n"; - else if (EType == EmitterType::Uncompress) - OS << "\n#ifdef GEN_UNCOMPRESS_INSTR\n" - << "#undef GEN_UNCOMPRESS_INSTR\n\n"; - else if (EType == EmitterType::CheckCompress) - OS << "\n#ifdef GEN_CHECK_COMPRESS_INSTR\n" - << "#undef GEN_CHECK_COMPRESS_INSTR\n\n"; - - if (EType == EmitterType::Compress) { - FuncH << "static bool compressInst(MCInst &OutInst,\n"; - FuncH.indent(25) << "const MCInst &MI,\n"; - FuncH.indent(25) << "const MCSubtargetInfo &STI) {\n"; - } else if (EType == EmitterType::Uncompress) { - FuncH << "static bool uncompressInst(MCInst &OutInst,\n"; - FuncH.indent(27) << "const MCInst &MI,\n"; - FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n"; - } else if (EType == EmitterType::CheckCompress) { - FuncH << "static bool isCompressibleInst(const MachineInstr &MI,\n"; - FuncH.indent(31) << "const " << TargetName << "Subtarget &STI) {\n"; - } - - if (CompressPatterns.empty()) { - OS << FuncH.str(); - OS.indent(2) << "return false;\n}\n"; - if (EType == EmitterType::Compress) - OS << "\n#endif //GEN_COMPRESS_INSTR\n"; - else if (EType == EmitterType::Uncompress) - OS << "\n#endif //GEN_UNCOMPRESS_INSTR\n\n"; - else if (EType == EmitterType::CheckCompress) - OS << "\n#endif //GEN_CHECK_COMPRESS_INSTR\n\n"; - return; - } - - std::string CaseString; - raw_string_ostream CaseStream(CaseString); - StringRef PrevOp; - StringRef CurOp; - CaseStream << " switch (MI.getOpcode()) {\n"; - CaseStream << " default: return false;\n"; - - bool CompressOrCheck = - EType == EmitterType::Compress || EType == EmitterType::CheckCompress; - bool CompressOrUncompress = - EType == EmitterType::Compress || EType == EmitterType::Uncompress; - std::string ValidatorName = - CompressOrUncompress - ? (TargetName + "ValidateMCOperandFor" + - (EType == EmitterType::Compress ? "Compress" : "Uncompress")) - .str() - : ""; - - for (auto &CompressPat : CompressPatterns) { - if (EType == EmitterType::Uncompress && CompressPat.IsCompressOnly) - continue; - - std::string CondString; - std::string CodeString; - raw_string_ostream CondStream(CondString); - raw_string_ostream CodeStream(CodeString); - CodeGenInstruction &Source = - CompressOrCheck ? CompressPat.Source : CompressPat.Dest; - CodeGenInstruction &Dest = - CompressOrCheck ? CompressPat.Dest : CompressPat.Source; - IndexedMap SourceOperandMap = CompressOrCheck - ? CompressPat.SourceOperandMap - : CompressPat.DestOperandMap; - IndexedMap &DestOperandMap = CompressOrCheck - ? CompressPat.DestOperandMap - : CompressPat.SourceOperandMap; - - CurOp = Source.TheDef->getName(); - // Check current and previous opcode to decide to continue or end a case. - if (CurOp != PrevOp) { - if (!PrevOp.empty()) - CaseStream.indent(6) << "break;\n } // case " + PrevOp + "\n"; - CaseStream.indent(4) << "case " + TargetName + "::" + CurOp + ": {\n"; - } - - std::set> FeaturesSet; - std::set>> AnyOfFeatureSets; - // Add CompressPat required features. - getReqFeatures(FeaturesSet, AnyOfFeatureSets, CompressPat.PatReqFeatures); - - // Add Dest instruction required features. - std::vector ReqFeatures; - std::vector RF = Dest.TheDef->getValueAsListOfDefs("Predicates"); - copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) { - return R->getValueAsBit("AssemblerMatcherPredicate"); - }); - getReqFeatures(FeaturesSet, AnyOfFeatureSets, ReqFeatures); - - // Emit checks for all required features. - for (auto &Op : FeaturesSet) { - StringRef Not = Op.first ? "!" : ""; - CondStream.indent(6) << Not << "STI.getFeatureBits()[" << TargetName - << "::" << Op.second << "]" - << " &&\n"; - } - - // Emit checks for all required feature groups. - for (auto &Set : AnyOfFeatureSets) { - CondStream.indent(6) << "("; - for (auto &Op : Set) { - bool IsLast = &Op == &*Set.rbegin(); - StringRef Not = Op.first ? "!" : ""; - CondStream << Not << "STI.getFeatureBits()[" << TargetName - << "::" << Op.second << "]"; - if (!IsLast) - CondStream << " || "; - } - CondStream << ") &&\n"; - } - - // Start Source Inst operands validation. - unsigned OpNo = 0; - for (OpNo = 0; OpNo < Source.Operands.size(); ++OpNo) { - if (SourceOperandMap[OpNo].TiedOpIdx != -1) { - if (Source.Operands[OpNo].Rec->isSubClassOf("RegisterClass")) - CondStream.indent(6) - << "(MI.getOperand(" << OpNo << ").isReg()) && (MI.getOperand(" - << SourceOperandMap[OpNo].TiedOpIdx << ").isReg()) &&\n" - << " (MI.getOperand(" << OpNo - << ").getReg() == MI.getOperand(" - << SourceOperandMap[OpNo].TiedOpIdx << ").getReg()) &&\n"; - else - PrintFatalError("Unexpected tied operand types!\n"); - } - // Check for fixed immediates\registers in the source instruction. - switch (SourceOperandMap[OpNo].Kind) { - case OpData::Operand: - // We don't need to do anything for source instruction operand checks. - break; - case OpData::Imm: - CondStream.indent(6) - << "(MI.getOperand(" << OpNo << ").isImm()) &&\n" - << " (MI.getOperand(" << OpNo - << ").getImm() == " << SourceOperandMap[OpNo].Data.Imm << ") &&\n"; - break; - case OpData::Reg: { - Record *Reg = SourceOperandMap[OpNo].Data.Reg; - CondStream.indent(6) - << "(MI.getOperand(" << OpNo << ").isReg()) &&\n" - << " (MI.getOperand(" << OpNo << ").getReg() == " << TargetName - << "::" << Reg->getName() << ") &&\n"; - break; - } - } - } - CodeStream.indent(6) << "// " << Dest.AsmString << "\n"; - if (CompressOrUncompress) - CodeStream.indent(6) << "OutInst.setOpcode(" << TargetName - << "::" << Dest.TheDef->getName() << ");\n"; - OpNo = 0; - for (const auto &DestOperand : Dest.Operands) { - CodeStream.indent(6) << "// Operand: " << DestOperand.Name << "\n"; - switch (DestOperandMap[OpNo].Kind) { - case OpData::Operand: { - unsigned OpIdx = DestOperandMap[OpNo].Data.Operand; - // Check that the operand in the Source instruction fits - // the type for the Dest instruction. - if (DestOperand.Rec->isSubClassOf("RegisterClass") || - DestOperand.Rec->isSubClassOf("RegisterOperand")) { - auto *ClassRec = DestOperand.Rec->isSubClassOf("RegisterClass") - ? DestOperand.Rec - : DestOperand.Rec->getValueAsDef("RegClass"); - // This is a register operand. Check the register class. - // Don't check register class if this is a tied operand, it was done - // for the operand its tied to. - if (DestOperand.getTiedRegister() == -1) - CondStream.indent(6) - << "(MI.getOperand(" << OpIdx << ").isReg()) &&\n" - << " (" << TargetName << "MCRegisterClasses[" << TargetName - << "::" << ClassRec->getName() - << "RegClassID].contains(MI.getOperand(" << OpIdx - << ").getReg())) &&\n"; - - if (CompressOrUncompress) - CodeStream.indent(6) - << "OutInst.addOperand(MI.getOperand(" << OpIdx << "));\n"; - } else { - // Handling immediate operands. - if (CompressOrUncompress) { - unsigned Entry = - getPredicates(MCOpPredicateMap, MCOpPredicates, DestOperand.Rec, - "MCOperandPredicate"); - CondStream.indent(6) - << ValidatorName << "(" - << "MI.getOperand(" << OpIdx << "), STI, " << Entry << ") &&\n"; - } else { - unsigned Entry = - getPredicates(ImmLeafPredicateMap, ImmLeafPredicates, - DestOperand.Rec, "ImmediateCode"); - CondStream.indent(6) - << "MI.getOperand(" << OpIdx << ").isImm() &&\n"; - CondStream.indent(6) << TargetName << "ValidateMachineOperand(" - << "MI.getOperand(" << OpIdx << "), &STI, " - << Entry << ") &&\n"; - } - if (CompressOrUncompress) - CodeStream.indent(6) - << "OutInst.addOperand(MI.getOperand(" << OpIdx << "));\n"; - } - break; - } - case OpData::Imm: { - if (CompressOrUncompress) { - unsigned Entry = getPredicates(MCOpPredicateMap, MCOpPredicates, - DestOperand.Rec, "MCOperandPredicate"); - CondStream.indent(6) - << ValidatorName << "(" - << "MCOperand::createImm(" << DestOperandMap[OpNo].Data.Imm - << "), STI, " << Entry << ") &&\n"; - } else { - unsigned Entry = getPredicates(ImmLeafPredicateMap, ImmLeafPredicates, - DestOperand.Rec, "ImmediateCode"); - CondStream.indent(6) - << TargetName - << "ValidateMachineOperand(MachineOperand::CreateImm(" - << DestOperandMap[OpNo].Data.Imm << "), &STI, " << Entry - << ") &&\n"; - } - if (CompressOrUncompress) - CodeStream.indent(6) << "OutInst.addOperand(MCOperand::createImm(" - << DestOperandMap[OpNo].Data.Imm << "));\n"; - } break; - case OpData::Reg: { - if (CompressOrUncompress) { - // Fixed register has been validated at pattern validation time. - Record *Reg = DestOperandMap[OpNo].Data.Reg; - CodeStream.indent(6) - << "OutInst.addOperand(MCOperand::createReg(" << TargetName - << "::" << Reg->getName() << "));\n"; - } - } break; - } - ++OpNo; - } - if (CompressOrUncompress) - CodeStream.indent(6) << "OutInst.setLoc(MI.getLoc());\n"; - mergeCondAndCode(CaseStream, CondStream.str(), CodeStream.str()); - PrevOp = CurOp; - } - Func << CaseStream.str() << "\n"; - // Close brace for the last case. - Func.indent(4) << "} // case " << CurOp << "\n"; - Func.indent(2) << "} // switch\n"; - Func.indent(2) << "return false;\n}\n"; - - if (!MCOpPredicates.empty()) { - OS << "static bool " << ValidatorName << "(const MCOperand &MCOp,\n" - << " const MCSubtargetInfo &STI,\n" - << " unsigned PredicateIndex) {\n" - << " switch (PredicateIndex) {\n" - << " default:\n" - << " llvm_unreachable(\"Unknown MCOperandPredicate kind\");\n" - << " break;\n"; - - printPredicates(MCOpPredicates, "MCOperandPredicate", OS); - - OS << " }\n" - << "}\n\n"; - } - - if (!ImmLeafPredicates.empty()) { - OS << "static bool " << TargetName - << "ValidateMachineOperand(const MachineOperand &MO,\n" - << " const " << TargetName << "Subtarget *Subtarget,\n" - << " unsigned PredicateIndex) {\n" - << " int64_t Imm = MO.getImm();\n" - << " switch (PredicateIndex) {\n" - << " default:\n" - << " llvm_unreachable(\"Unknown ImmLeaf Predicate kind\");\n" - << " break;\n"; - - printPredicates(ImmLeafPredicates, "ImmediateCode", OS); - - OS << " }\n" - << "}\n\n"; - } - - OS << FuncH.str(); - OS << Func.str(); - - if (EType == EmitterType::Compress) - OS << "\n#endif //GEN_COMPRESS_INSTR\n"; - else if (EType == EmitterType::Uncompress) - OS << "\n#endif //GEN_UNCOMPRESS_INSTR\n\n"; - else if (EType == EmitterType::CheckCompress) - OS << "\n#endif //GEN_CHECK_COMPRESS_INSTR\n\n"; + PI->compressInstEmitterEmitCompressInstEmitter(OS, EType, Target, CompressPatterns); } void CompressInstEmitter::run(raw_ostream &OS) { std::vector Insts = Records.getAllDerivedDefinitions("CompressPat"); + formatted_raw_ostream FOS(OS); + switch (PrinterLLVM::getLanguage()) { + case PRINTER_LANG_CPP: + PI = new PrinterLLVM(FOS); + break; + case PRINTER_LANG_CAPSTONE_C: + PI = new PrinterCapstone(FOS); + break; + + default: llvm_unreachable("No such language, only valid languages are " + "PRINTER_LANG_CPP and PRINTER_LANG_CAPSTONE_C"); + } + // Process the CompressPat definitions, validating them as we do so. for (unsigned I = 0, E = Insts.size(); I != E; ++I) evaluateCompressPat(Insts[I]); diff --git a/llvm/utils/TableGen/Printer.h b/llvm/utils/TableGen/Printer.h index 9380ec6b404f..7c30ed2de84f 100644 --- a/llvm/utils/TableGen/Printer.h +++ b/llvm/utils/TableGen/Printer.h @@ -27,6 +27,7 @@ #include +#include "llvm/ADT/IndexedMap.h" typedef enum { ST_NONE, ST_DECL_OS, @@ -34,6 +35,46 @@ typedef enum { ST_ENUM_SYSOPS_OS, } StreamType; +// standalone type definitions +// Backend: CompressInstEmitter +struct OpData { + enum MapKind { Operand, Imm, Reg }; + MapKind Kind; + union { + // Operand number mapped to. + unsigned Operand; + // Integer immediate value. + int64_t Imm; + // Physical register. + Record *Reg; + } Data; + // Tied operand index within the instruction. + int TiedOpIdx = -1; +}; +struct CompressPat { + // The source instruction definition. + CodeGenInstruction Source; + // The destination instruction to transform to. + CodeGenInstruction Dest; + // Required target features to enable pattern. + std::vector PatReqFeatures; + // Maps operands in the Source Instruction to + // the corresponding Dest instruction operand. + IndexedMap SourceOperandMap; + // Maps operands in the Dest Instruction + // to the corresponding Source instruction operand. + IndexedMap DestOperandMap; + + bool IsCompressOnly; + CompressPat(CodeGenInstruction &S, CodeGenInstruction &D, + std::vector RF, IndexedMap &SourceMap, + IndexedMap &DestMap, bool IsCompressOnly) + : Source(S), Dest(D), PatReqFeatures(RF), SourceOperandMap(SourceMap), + DestOperandMap(DestMap), IsCompressOnly(IsCompressOnly) {} +}; + +enum EmitterType { Compress, Uncompress, CheckCompress }; + namespace llvm { class PrinterBitVectorEmitter { @@ -994,6 +1035,25 @@ class PrinterLLVM { Record *Entry) const; virtual void searchableTablesEmitMapIV(unsigned i) const; virtual void searchableTablesEmitMapV(); + + //--------------------------- + // Backend: CompressInstEmitter + //--------------------------- + virtual void compressInstEmitterEmitCompressInstEmitter(raw_ostream &OS, EmitterType EType, + // CompressInstEmitter members (not necessarily all of them, just the needed ones) + CodeGenTarget &Target, SmallVector &CompressPatterns); + + // static helpers + static void compressInstEmitterPrintPredicates(const std::vector &Predicates, + StringRef Name, raw_ostream &OS); + static void compressInstEmitterMergeCondAndCode(raw_ostream &CombinedStream, StringRef CondStr, + StringRef CodeStr); + static unsigned compressInstEmitterGetPredicates(DenseMap &PredicateMap, + std::vector &Predicates, + Record *Rec, StringRef Name); + static void compressInstEmitterGetReqFeatures(std::set> &FeaturesSet, + std::set>> &AnyOfFeatureSets, + const std::vector &ReqFeatures); }; //============================== @@ -1858,6 +1918,11 @@ class PrinterCapstone : public PrinterLLVM { Record *Entry) const override; void searchableTablesEmitMapIV(unsigned i) const override; void searchableTablesEmitMapV() override; + + // Backend: CompressInstEmitter + virtual void compressInstEmitterEmitCompressInstEmitter(raw_ostream &OS, EmitterType EType, + // CompressInstEmitter members (not necessarily all of them, just the needed ones) + CodeGenTarget &Target, SmallVector &CompressPatterns) override; }; } // end namespace llvm diff --git a/llvm/utils/TableGen/PrinterCapstone.cpp b/llvm/utils/TableGen/PrinterCapstone.cpp index ac51be956f18..3419240efd1c 100644 --- a/llvm/utils/TableGen/PrinterCapstone.cpp +++ b/llvm/utils/TableGen/PrinterCapstone.cpp @@ -772,6 +772,10 @@ static void patchTemplateArgs(const std::string &TargetName, while (B != std::string::npos && E != std::string::npos) { std::string const &DecName = Code.substr(0, B); std::string Args = Code.substr(B + 1, E - B - 1); + // HACK! Remove on proper fix + if (!(B < E)) { + return; + } std::string Rest = Code.substr(E + 1); if (Args.empty()) { return; @@ -815,6 +819,140 @@ static void patchPrintOperandAddr(std::string &Decoder) { Decoder = Regex(Find).sub(Replace, Decoder); } +static void patchSTIObject(std::string Target, std::string &Code) { + // searching for a subtarget arg in a function def or use + std::string FindSubtargetArg = ",?\\s*(const\\s+)?(MCSubtargetInfo|" + Target + "Subtarget)" // subtarget object type + + "\\s*[&*]\\s*" // as a reference or pointer + + "[a-zA-Z_][a-zA-Z0-9_]*"; // named anthing, optionally followed by comma (but not if it's the last arg) + Code = std::regex_replace(Code, std::regex(FindSubtargetArg), ""); // replace with nothing + + std::string FindGetFeatureBitsCall = "([a-zA-Z_][a-zA-Z0-9_]*\\.getFeatureBits\\(\\))" // a call of the form .getFeatureBits() + "\\[([a-zA-Z_][a-zA-Z0-9_]*)\\]"; + std::string ReplaceGetFeatureBitsCall = Target + "_getFeatureBits(MI->csh->mode, $2)"; + Code = std::regex_replace(Code, std::regex(FindGetFeatureBitsCall), ReplaceGetFeatureBitsCall); + + std::string FindLeftOverUsesOfSTI = ",?\\s*[&*]?\\s*STI"; + Code = std::regex_replace(Code, std::regex(FindLeftOverUsesOfSTI), ""); + + // target-specific replacements + if (Target == "RISCV") { + std::string Find64BitChecks = "([a-zA-Z_][a-zA-Z0-9_]*)->is64Bit\\(\\)" "|" + "([a-zA-Z_][a-zA-Z0-9_]*)?\\.getTargetTriple\\(\\)\\.isArch64Bit\\(\\)"; + Code = std::regex_replace(Code, std::regex(Find64BitChecks), Target + "_getFeatureBits(MI->csh->mode," + Target + "_Feature64Bit)"); + } +} + +static void patchIsGetOperand(std::string Target, std::string& Code) { + // will patch MI.getOperand(1).isReg() into MCOperand_isReg(MCInst_getOperand(MI, 1)) + std::string FindGetOperand = "([a-zA-Z_][a-zA-Z0-9_]*)" // any object/pointer (as $1) + "(\\.|->)getOperand\\(" // in method call expression getOperand ($2, ignored) + "([^)]*)" // having any arguments (as $3) + "\\)"; + + std::string FindIsGetImmOrReg = FindGetOperand + + "\\." // then one of the following chained method calls: + "(isReg|getReg" // isReg or getReg + "|isImm|getImm)\\(\\)"; // or isImm or getImm (as $4) + std::string ReplaceIsGetImmOrReg = "MCOperand_$4(MCInst_getOperand($1, $3))"; + Code = std::regex_replace(Code, std::regex(FindIsGetImmOrReg), ReplaceIsGetImmOrReg); + + // patch individual getOperand calls + std::string ReplaceGetOperand = "MCInst_getOperand($1, $3)"; + Code = std::regex_replace(Code, std::regex(FindGetOperand), ReplaceGetOperand); +} + +static void patchGetOpcode(std::string Target, std::string& Code) { + std::string FindGetOpcode = "([a-zA-Z_][a-zA-Z0-9_]*)\\.getOpcode\\(\\)"; + std::string ReplaceGetOpcode = "MCInst_getOpcode($1)"; + Code = std::regex_replace(Code, std::regex(FindGetOpcode), ReplaceGetOpcode); +} + +static void patchAddOperandGetOperand(std::string Target, std::string& Code) { + std::string FindAddGetOperand = "([a-zA-Z_][a-zA-Z0-9_]*)" // any variable + "\\.addOperand\\(" // in method call addOperand + "([a-zA-Z_][a-zA-Z0-9_]*)\\." // whose argument is another method call on another variable + "getOperand\\(" "([^)]*)" "\\)" // of method getOperand and its argument + "\\)"; + std::string ReplaceAddGetOperand = "McInst_addOperand2($1, McInst_getOperand($2, $3))"; + Code = std::regex_replace(Code, std::regex(FindAddGetOperand), ReplaceAddGetOperand); +} + +static void patchAddOperandCreateOperand(std::string Target, std::string &Code) { + std::string FindAddCreateReg = "([a-zA-Z_][a-zA-Z0-9_]*)" // any variable + "\\.addOperand\\(" // in method call addOperand + "MCOperand_createReg\\(([a-zA-Z_][a-zA-Z0-9_]*)\\)" // on a createReg call + "\\)"; + std::string ReplaceAddCreateReg = "MCInst_CreateReg0($1, $2)"; + Code = std::regex_replace(Code, std::regex(FindAddCreateReg), ReplaceAddCreateReg); + + std::string FindAddCreateImm = "([a-zA-Z_][a-zA-Z0-9_]*)" // any variable + "\\.addOperand\\(" // in method call addOperand + "MCOperand_createImm\\(([0-9]+)\\)" // on a createImm call + "\\)"; + std::string ReplaceAddCreateImm = "MCInst_CreateImm0($1, $2)"; + Code = std::regex_replace(Code, std::regex(FindAddCreateImm), ReplaceAddCreateImm); +} + +static void patchRegClassContains(std::string Target, std::string& Code) { + std::string FindRegClassContainsExpression = "([a-zA-Z_][a-zA-Z0-9_]*)" // any variable + "\\[" "([a-zA-Z_][a-zA-Z0-9_]*RegClassID)" "\\]" // indexed with a regclass ID enum + "\\.contains\\("; // followed by the start of a .contains check + + std::string ReplaceRegClassContainsExpression = "MCRegisterClass_contains(MCRegisterInfo_getRegClass(MI->MRI, $2),"; + Code = std::regex_replace(Code, std::regex(FindRegClassContainsExpression), ReplaceRegClassContainsExpression); +} + +static void patchSetLoc(std::string Target, std::string& Code) { + // there is no loc property in McInst struct in capstone + std::string FindSetLocCalls = ".*\\.setLoc.*;"; + Code = std::regex_replace(Code, std::regex(FindSetLocCalls), ""); +} + +static void patchRISCVValidateCompressedInst(std::string &Code) { + // replace the reference types with pointer types, remove const qualifiers for simplicity + // any identifier followed by '&' followed by any identifier, followed by comma or closed paren + // technically this could match a usage of the bitwise and operator in a comma expression, but this is unlikely + std::string FindReferenceTypes = "(const)?\\s*([a-zA-Z_][a-zA-Z0-9_]*)\\s*&\\s*([a-zA-Z_][a-zA-Z0-9_]*)(,|\\))"; + std::string ReplaceReferenceTypes = "$2 *$3$4"; + Code = std::regex_replace(Code, std::regex(FindReferenceTypes), ReplaceReferenceTypes); + + // also add a McInst argument to any function that doesn't contain it + Code = std::regex_replace(Code, std::regex("bool (RISCVValidate.*)\\("), "bool $1(MCInst *MI,"); + // modify calls to pass the argument + std::string start = "(^|\\r?\\n)"; + Code = std::regex_replace(Code, std::regex(start + "(\\s*RISCVValidate[^(]*)\\(" "(.*)" "\\)"),"$1$2(MI,$3)"); +} + +static void patchEvaluateAsConstantImm(std::string Target, std::string& Code) { + std::string FindEvaluateAsConstantImm = "([a-zA-Z_][a-zA-Z0-9_]*)\\." + "evaluateAsConstantImm\\(" "([a-zA-Z_][a-zA-Z0-9_]*)" "\\)"; + // use comma operator to chain an assignment and a boolean value + std::string ReplaceEvaluateAsConstantImm = "($2 = MCOperand_getImm($1), MCOperand_isImm($1))"; + Code = std::regex_replace(Code, std::regex(FindEvaluateAsConstantImm), ReplaceEvaluateAsConstantImm); +} + +static void patchSetOpcode(std::string TargetName, std::string& Code) { + std::string FindSetOpcode = "([a-zA-Z_][a-zA-Z0-9_]*)\\.setOpcode\\(([a-zA-Z_][a-zA-Z0-9_]*)\\)"; + std::string ReplaceSetOpcode = "MCInst_setOpcode($1, $2)"; + Code = std::regex_replace(Code, std::regex(FindSetOpcode), ReplaceSetOpcode); +} + +static void patchIsBareSymbolRef(std::string TargetName, std::string& Code) { + std::string FindIsBareSymbolRef = "([a-zA-Z_][a-zA-Z0-9_]*)\\.isBareSymbolRef\\(\\)"; + std::string ReplaceIsBareSymbolRef = "MCOperand_isExpr($1)"; + Code = std::regex_replace(Code, std::regex(FindIsBareSymbolRef), ReplaceIsBareSymbolRef); +} + +static void patchIsUintIsShiftedUInt(std::string TargetName, std::string& Code) { + std::string FindIsUint = "is(U)?Int_([0-9]+)\\(" "([^)]+)" "\\)"; + std::string ReplaceIsUint = "is$1IntN($2, $3)"; + Code = std::regex_replace(Code, std::regex(FindIsUint), ReplaceIsUint); + + std::string FindIsShiftedUint = "isShifted(U)?Int_([0-9]+)_([0-9]+)\\(" "([^)]+)" "\\)"; + std::string ReplaceIsShiftedUint = "isShifted$1IntN($2, $3, $4)"; + Code = std::regex_replace(Code, std::regex(FindIsShiftedUint), ReplaceIsShiftedUint); +} + std::string PrinterCapstone::translateToC(std::string const &TargetName, std::string const &Code) { std::string PatchedCode(Code); @@ -822,8 +960,25 @@ std::string PrinterCapstone::translateToC(std::string const &TargetName, patchNullptr(PatchedCode); patchIsGetImmReg(PatchedCode); patchTemplateArgs(TargetName, PatchedCode); + patchSTIObject(TargetName, PatchedCode); + + patchAddOperandGetOperand(TargetName, PatchedCode); + patchIsGetOperand(TargetName, PatchedCode); // must be after patchAddOperandGetOperand + patchAddOperandCreateOperand(TargetName, PatchedCode); + + patchRegClassContains(TargetName, PatchedCode); + + patchSetLoc(TargetName, PatchedCode); + patchEvaluateAsConstantImm(TargetName, PatchedCode); + + patchSetOpcode(TargetName, PatchedCode); + patchGetOpcode(TargetName, PatchedCode); + patchIsBareSymbolRef(TargetName, PatchedCode); + patchIsUintIsShiftedUInt(TargetName, PatchedCode); if (TargetName == "ARM" || TargetName == "Alpha") { patchPrintOperandAddr(PatchedCode); + } else if (TargetName == "RISCV") { + patchRISCVValidateCompressedInst(PatchedCode); } return PatchedCode; } @@ -4365,4 +4520,16 @@ void PrinterCapstone::searchableTablesEmitMapV() { EnumOS << "#endif\n\n"; } +void PrinterCapstone::compressInstEmitterEmitCompressInstEmitter(raw_ostream &OS, EmitterType EType, + CodeGenTarget &Target, SmallVector &CompressPatterns) { + std::string CppOutput; + raw_string_ostream CppOutputStream(CppOutput); + // call the PrinterLLVM implementation to avoid duplicating the massive piece of logic there + PrinterLLVM::compressInstEmitterEmitCompressInstEmitter(CppOutputStream, EType, Target, CompressPatterns); + + // try to patch C++-isms in the output till it's valid C + std::string COutput = translateToC(Target.getName().str(), CppOutput); + OS << COutput; +} + } // end namespace llvm diff --git a/llvm/utils/TableGen/PrinterLLVM.cpp b/llvm/utils/TableGen/PrinterLLVM.cpp index c8f84aea3ccf..2a26594290b5 100644 --- a/llvm/utils/TableGen/PrinterLLVM.cpp +++ b/llvm/utils/TableGen/PrinterLLVM.cpp @@ -6426,4 +6426,416 @@ void PrinterLLVM::searchableTablesEmitMapIV(unsigned i) const { } void PrinterLLVM::searchableTablesEmitMapV() { OS << " };\n\n"; } + + //--------------------------- + // Backend: CompressInstEmitter + //--------------------------- + +void PrinterLLVM::compressInstEmitterEmitCompressInstEmitter(raw_ostream &OS, EmitterType EType, + CodeGenTarget &Target, SmallVector &CompressPatterns) { + // CodeGenTarget &Target = *CITarget; + // SmallVector &CompressPatterns = *CICompressPatterns; + + + Record *AsmWriter = Target.getAsmWriter(); + if (!AsmWriter->getValueAsInt("PassSubtarget")) + PrintFatalError(AsmWriter->getLoc(), + "'PassSubtarget' is false. SubTargetInfo object is needed " + "for target features.\n"); + + StringRef TargetName = Target.getName(); + + // Sort entries in CompressPatterns to handle instructions that can have more + // than one candidate for compression\uncompression, e.g ADD can be + // transformed to a C_ADD or a C_MV. When emitting 'uncompress()' function the + // source and destination are flipped and the sort key needs to change + // accordingly. + llvm::stable_sort(CompressPatterns, [EType](const CompressPat &LHS, + const CompressPat &RHS) { + if (EType == EmitterType::Compress || EType == EmitterType::CheckCompress) + return (LHS.Source.TheDef->getName() < RHS.Source.TheDef->getName()); + return (LHS.Dest.TheDef->getName() < RHS.Dest.TheDef->getName()); + }); + + // A list of MCOperandPredicates for all operands in use, and the reverse map. + std::vector MCOpPredicates; + DenseMap MCOpPredicateMap; + // A list of ImmLeaf Predicates for all operands in use, and the reverse map. + std::vector ImmLeafPredicates; + DenseMap ImmLeafPredicateMap; + + std::string F; + std::string FH; + raw_string_ostream Func(F); + raw_string_ostream FuncH(FH); + + if (EType == EmitterType::Compress) + OS << "\n#ifdef GEN_COMPRESS_INSTR\n" + << "#undef GEN_COMPRESS_INSTR\n\n"; + else if (EType == EmitterType::Uncompress) + OS << "\n#ifdef GEN_UNCOMPRESS_INSTR\n" + << "#undef GEN_UNCOMPRESS_INSTR\n\n"; + else if (EType == EmitterType::CheckCompress) + OS << "\n#ifdef GEN_CHECK_COMPRESS_INSTR\n" + << "#undef GEN_CHECK_COMPRESS_INSTR\n\n"; + + if (EType == EmitterType::Compress) { + FuncH << "static bool compressInst(MCInst &OutInst,\n"; + FuncH.indent(25) << "const MCInst &MI,\n"; + FuncH.indent(25) << "const MCSubtargetInfo &STI) {\n"; + } else if (EType == EmitterType::Uncompress) { + FuncH << "static bool uncompressInst(MCInst &OutInst,\n"; + FuncH.indent(27) << "const MCInst &MI,\n"; + FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n"; + } else if (EType == EmitterType::CheckCompress) { + FuncH << "static bool isCompressibleInst(const MachineInstr &MI,\n"; + FuncH.indent(31) << "const " << TargetName << "Subtarget &STI) {\n"; + } + + if (CompressPatterns.empty()) { + OS << FuncH.str(); + OS.indent(2) << "return false;\n}\n"; + if (EType == EmitterType::Compress) + OS << "\n#endif //GEN_COMPRESS_INSTR\n"; + else if (EType == EmitterType::Uncompress) + OS << "\n#endif //GEN_UNCOMPRESS_INSTR\n\n"; + else if (EType == EmitterType::CheckCompress) + OS << "\n#endif //GEN_CHECK_COMPRESS_INSTR\n\n"; + return; + } + + std::string CaseString; + raw_string_ostream CaseStream(CaseString); + StringRef PrevOp; + StringRef CurOp; + CaseStream << " switch (MI.getOpcode()) {\n"; + CaseStream << " default: return false;\n"; + + bool CompressOrCheck = + EType == EmitterType::Compress || EType == EmitterType::CheckCompress; + bool CompressOrUncompress = + EType == EmitterType::Compress || EType == EmitterType::Uncompress; + std::string ValidatorName = + CompressOrUncompress + ? (TargetName + "ValidateMCOperandFor" + + (EType == EmitterType::Compress ? "Compress" : "Uncompress")) + .str() + : ""; + + for (auto &CompressPat : CompressPatterns) { + if (EType == EmitterType::Uncompress && CompressPat.IsCompressOnly) + continue; + + std::string CondString; + std::string CodeString; + raw_string_ostream CondStream(CondString); + raw_string_ostream CodeStream(CodeString); + CodeGenInstruction &Source = + CompressOrCheck ? CompressPat.Source : CompressPat.Dest; + CodeGenInstruction &Dest = + CompressOrCheck ? CompressPat.Dest : CompressPat.Source; + IndexedMap SourceOperandMap = CompressOrCheck + ? CompressPat.SourceOperandMap + : CompressPat.DestOperandMap; + IndexedMap &DestOperandMap = CompressOrCheck + ? CompressPat.DestOperandMap + : CompressPat.SourceOperandMap; + + CurOp = Source.TheDef->getName(); + // Check current and previous opcode to decide to continue or end a case. + if (CurOp != PrevOp) { + if (!PrevOp.empty()) + CaseStream.indent(6) << "break;\n } // case " + PrevOp + "\n"; + CaseStream.indent(4) << "case " + TargetName + "::" + CurOp + ": {\n"; + } + + std::set> FeaturesSet; + std::set>> AnyOfFeatureSets; + // Add CompressPat required features. + compressInstEmitterGetReqFeatures(FeaturesSet, AnyOfFeatureSets, CompressPat.PatReqFeatures); + + // Add Dest instruction required features. + std::vector ReqFeatures; + std::vector RF = Dest.TheDef->getValueAsListOfDefs("Predicates"); + copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) { + return R->getValueAsBit("AssemblerMatcherPredicate"); + }); + compressInstEmitterGetReqFeatures(FeaturesSet, AnyOfFeatureSets, ReqFeatures); + + // Emit checks for all required features. + for (auto &Op : FeaturesSet) { + StringRef Not = Op.first ? "!" : ""; + CondStream.indent(6) << Not << "STI.getFeatureBits()[" << TargetName + << "::" << Op.second << "]" + << " &&\n"; + } + + // Emit checks for all required feature groups. + for (auto &Set : AnyOfFeatureSets) { + CondStream.indent(6) << "("; + for (auto &Op : Set) { + bool IsLast = &Op == &*Set.rbegin(); + StringRef Not = Op.first ? "!" : ""; + CondStream << Not << "STI.getFeatureBits()[" << TargetName + << "::" << Op.second << "]"; + if (!IsLast) + CondStream << " || "; + } + CondStream << ") &&\n"; + } + + // Start Source Inst operands validation. + unsigned OpNo = 0; + for (OpNo = 0; OpNo < Source.Operands.size(); ++OpNo) { + if (SourceOperandMap[OpNo].TiedOpIdx != -1) { + if (Source.Operands[OpNo].Rec->isSubClassOf("RegisterClass")) + CondStream.indent(6) + << "(MI.getOperand(" << OpNo << ").isReg()) && (MI.getOperand(" + << SourceOperandMap[OpNo].TiedOpIdx << ").isReg()) &&\n" + << " (MI.getOperand(" << OpNo + << ").getReg() == MI.getOperand(" + << SourceOperandMap[OpNo].TiedOpIdx << ").getReg()) &&\n"; + else + PrintFatalError("Unexpected tied operand types!\n"); + } + // Check for fixed immediates\registers in the source instruction. + switch (SourceOperandMap[OpNo].Kind) { + case OpData::Operand: + // We don't need to do anything for source instruction operand checks. + break; + case OpData::Imm: + CondStream.indent(6) + << "(MI.getOperand(" << OpNo << ").isImm()) &&\n" + << " (MI.getOperand(" << OpNo + << ").getImm() == " << SourceOperandMap[OpNo].Data.Imm << ") &&\n"; + break; + case OpData::Reg: { + Record *Reg = SourceOperandMap[OpNo].Data.Reg; + CondStream.indent(6) + << "(MI.getOperand(" << OpNo << ").isReg()) &&\n" + << " (MI.getOperand(" << OpNo << ").getReg() == " << TargetName + << "::" << Reg->getName() << ") &&\n"; + break; + } + } + } + CodeStream.indent(6) << "// " << Dest.AsmString << "\n"; + if (CompressOrUncompress) + CodeStream.indent(6) << "OutInst.setOpcode(" << TargetName + << "::" << Dest.TheDef->getName() << ");\n"; + OpNo = 0; + for (const auto &DestOperand : Dest.Operands) { + CodeStream.indent(6) << "// Operand: " << DestOperand.Name << "\n"; + switch (DestOperandMap[OpNo].Kind) { + case OpData::Operand: { + unsigned OpIdx = DestOperandMap[OpNo].Data.Operand; + // Check that the operand in the Source instruction fits + // the type for the Dest instruction. + if (DestOperand.Rec->isSubClassOf("RegisterClass") || + DestOperand.Rec->isSubClassOf("RegisterOperand")) { + auto *ClassRec = DestOperand.Rec->isSubClassOf("RegisterClass") + ? DestOperand.Rec + : DestOperand.Rec->getValueAsDef("RegClass"); + // This is a register operand. Check the register class. + // Don't check register class if this is a tied operand, it was done + // for the operand its tied to. + if (DestOperand.getTiedRegister() == -1) + CondStream.indent(6) + << "(MI.getOperand(" << OpIdx << ").isReg()) &&\n" + << " (" << TargetName << "MCRegisterClasses[" << TargetName + << "::" << ClassRec->getName() + << "RegClassID].contains(MI.getOperand(" << OpIdx + << ").getReg())) &&\n"; + + if (CompressOrUncompress) + CodeStream.indent(6) + << "OutInst.addOperand(MI.getOperand(" << OpIdx << "));\n"; + } else { + // Handling immediate operands. + if (CompressOrUncompress) { + unsigned Entry = + compressInstEmitterGetPredicates(MCOpPredicateMap, MCOpPredicates, DestOperand.Rec, + "MCOperandPredicate"); + CondStream.indent(6) + << ValidatorName << "(" + << "MI.getOperand(" << OpIdx << "), STI, " << Entry << ") &&\n"; + } else { + unsigned Entry = + compressInstEmitterGetPredicates(ImmLeafPredicateMap, ImmLeafPredicates, + DestOperand.Rec, "ImmediateCode"); + CondStream.indent(6) + << "MI.getOperand(" << OpIdx << ").isImm() &&\n"; + CondStream.indent(6) << TargetName << "ValidateMachineOperand(" + << "MI.getOperand(" << OpIdx << "), &STI, " + << Entry << ") &&\n"; + } + if (CompressOrUncompress) + CodeStream.indent(6) + << "OutInst.addOperand(MI.getOperand(" << OpIdx << "));\n"; + } + break; + } + case OpData::Imm: { + if (CompressOrUncompress) { + unsigned Entry = compressInstEmitterGetPredicates(MCOpPredicateMap, MCOpPredicates, + DestOperand.Rec, "MCOperandPredicate"); + CondStream.indent(6) + << ValidatorName << "(" + << "MCOperand::createImm(" << DestOperandMap[OpNo].Data.Imm + << "), STI, " << Entry << ") &&\n"; + } else { + unsigned Entry = compressInstEmitterGetPredicates(ImmLeafPredicateMap, ImmLeafPredicates, + DestOperand.Rec, "ImmediateCode"); + CondStream.indent(6) + << TargetName + << "ValidateMachineOperand(MachineOperand::CreateImm(" + << DestOperandMap[OpNo].Data.Imm << "), &STI, " << Entry + << ") &&\n"; + } + if (CompressOrUncompress) + CodeStream.indent(6) << "OutInst.addOperand(MCOperand::createImm(" + << DestOperandMap[OpNo].Data.Imm << "));\n"; + } break; + case OpData::Reg: { + if (CompressOrUncompress) { + // Fixed register has been validated at pattern validation time. + Record *Reg = DestOperandMap[OpNo].Data.Reg; + CodeStream.indent(6) + << "OutInst.addOperand(MCOperand::createReg(" << TargetName + << "::" << Reg->getName() << "));\n"; + } + } break; + } + ++OpNo; + } + if (CompressOrUncompress) + CodeStream.indent(6) << "OutInst.setLoc(MI.getLoc());\n"; + compressInstEmitterMergeCondAndCode(CaseStream, CondStream.str(), CodeStream.str()); + PrevOp = CurOp; + } + Func << CaseStream.str() << "\n"; + // Close brace for the last case. + Func.indent(4) << "} // case " << CurOp << "\n"; + Func.indent(2) << "} // switch\n"; + Func.indent(2) << "return false;\n}\n"; + + if (!MCOpPredicates.empty()) { + OS << "static bool " << ValidatorName << "(const MCOperand &MCOp,\n" + << " const MCSubtargetInfo &STI,\n" + << " unsigned PredicateIndex) {\n" + << " switch (PredicateIndex) {\n" + << " default:\n" + << " llvm_unreachable(\"Unknown MCOperandPredicate kind\");\n" + << " break;\n"; + + compressInstEmitterPrintPredicates(MCOpPredicates, "MCOperandPredicate", OS); + + OS << " }\n" + << "}\n\n"; + } + + if (!ImmLeafPredicates.empty()) { + OS << "static bool " << TargetName + << "ValidateMachineOperand(const MachineOperand &MO,\n" + << " const " << TargetName << "Subtarget *Subtarget,\n" + << " unsigned PredicateIndex) {\n" + << " int64_t Imm = MO.getImm();\n" + << " switch (PredicateIndex) {\n" + << " default:\n" + << " llvm_unreachable(\"Unknown ImmLeaf Predicate kind\");\n" + << " break;\n"; + + compressInstEmitterPrintPredicates(ImmLeafPredicates, "ImmediateCode", OS); + + OS << " }\n" + << "}\n\n"; + } + + OS << FuncH.str(); + OS << Func.str(); + + if (EType == EmitterType::Compress) + OS << "\n#endif //GEN_COMPRESS_INSTR\n"; + else if (EType == EmitterType::Uncompress) + OS << "\n#endif //GEN_UNCOMPRESS_INSTR\n\n"; + else if (EType == EmitterType::CheckCompress) + OS << "\n#endif //GEN_CHECK_COMPRESS_INSTR\n\n"; +} + + +void PrinterLLVM::compressInstEmitterPrintPredicates(const std::vector &Predicates, + StringRef Name, raw_ostream &OS) { + for (unsigned I = 0; I < Predicates.size(); ++I) { + StringRef Pred = Predicates[I]->getValueAsString(Name); + OS << " case " << I + 1 << ": {\n" + << " // " << Predicates[I]->getName() << "\n" + << " " << Pred << "\n" + << " }\n"; + } +} + +void PrinterLLVM::compressInstEmitterMergeCondAndCode(raw_ostream &CombinedStream, StringRef CondStr, + StringRef CodeStr) { + // Remove first indentation and last '&&'. + CondStr = CondStr.drop_front(6).drop_back(4); + CombinedStream.indent(4) << "if (" << CondStr << ") {\n"; + CombinedStream << CodeStr; + CombinedStream.indent(4) << " return true;\n"; + CombinedStream.indent(4) << "} // if\n"; +} + +unsigned PrinterLLVM::compressInstEmitterGetPredicates(DenseMap &PredicateMap, + std::vector &Predicates, + Record *Rec, StringRef Name) { + unsigned &Entry = PredicateMap[Rec]; + if (Entry) + return Entry; + + if (!Rec->isValueUnset(Name)) { + Predicates.push_back(Rec); + Entry = Predicates.size(); + return Entry; + } + + PrintFatalError(Rec->getLoc(), "No " + Name + + " predicate on this operand at all: '" + + Rec->getName() + "'"); + return 0; +} + +void PrinterLLVM::compressInstEmitterGetReqFeatures(std::set> &FeaturesSet, + std::set>> &AnyOfFeatureSets, + const std::vector &ReqFeatures) { + for (auto &R : ReqFeatures) { + const DagInit *D = R->getValueAsDag("AssemblerCondDag"); + std::string CombineType = D->getOperator()->getAsString(); + if (CombineType != "any_of" && CombineType != "all_of") + PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!"); + if (D->getNumArgs() == 0) + PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!"); + bool IsOr = CombineType == "any_of"; + std::set> AnyOfSet; + + for (auto *Arg : D->getArgs()) { + bool IsNot = false; + if (auto *NotArg = dyn_cast(Arg)) { + if (NotArg->getOperator()->getAsString() != "not" || + NotArg->getNumArgs() != 1) + PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!"); + Arg = NotArg->getArg(0); + IsNot = true; + } + if (!isa(Arg) || + !cast(Arg)->getDef()->isSubClassOf("SubtargetFeature")) + PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!"); + if (IsOr) + AnyOfSet.insert({IsNot, cast(Arg)->getDef()->getName()}); + else + FeaturesSet.insert({IsNot, cast(Arg)->getDef()->getName()}); + } + + if (IsOr) + AnyOfFeatureSets.insert(AnyOfSet); + } +} } // end namespace llvm From 2bfe6590da2a2ac30b014ab2b0be80f462a4beb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D9=85=D8=B5=D8=B7=D9=81=D9=8A=20=D9=85=D8=AD=D9=85=D9=88?= =?UTF-8?q?=D8=AF=20=D9=83=D9=85=D8=A7=D9=84=20=D8=A7=D9=84=D8=AF=D9=8A?= =?UTF-8?q?=D9=86?= <48567303+moste00@users.noreply.github.com> Date: Sat, 6 Sep 2025 02:30:14 +0300 Subject: [PATCH 4/6] final touches to fix the generation of compressed instruction info logic --- llvm/utils/TableGen/PrinterCapstone.cpp | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/llvm/utils/TableGen/PrinterCapstone.cpp b/llvm/utils/TableGen/PrinterCapstone.cpp index 3419240efd1c..9f524f7abd65 100644 --- a/llvm/utils/TableGen/PrinterCapstone.cpp +++ b/llvm/utils/TableGen/PrinterCapstone.cpp @@ -873,7 +873,7 @@ static void patchAddOperandGetOperand(std::string Target, std::string& Code) { "([a-zA-Z_][a-zA-Z0-9_]*)\\." // whose argument is another method call on another variable "getOperand\\(" "([^)]*)" "\\)" // of method getOperand and its argument "\\)"; - std::string ReplaceAddGetOperand = "McInst_addOperand2($1, McInst_getOperand($2, $3))"; + std::string ReplaceAddGetOperand = "MCInst_addOperand2($1, MCInst_getOperand($2, $3))"; Code = std::regex_replace(Code, std::regex(FindAddGetOperand), ReplaceAddGetOperand); } @@ -882,14 +882,14 @@ static void patchAddOperandCreateOperand(std::string Target, std::string &Code) "\\.addOperand\\(" // in method call addOperand "MCOperand_createReg\\(([a-zA-Z_][a-zA-Z0-9_]*)\\)" // on a createReg call "\\)"; - std::string ReplaceAddCreateReg = "MCInst_CreateReg0($1, $2)"; + std::string ReplaceAddCreateReg = "MCOperand_CreateReg0($1, $2)"; Code = std::regex_replace(Code, std::regex(FindAddCreateReg), ReplaceAddCreateReg); std::string FindAddCreateImm = "([a-zA-Z_][a-zA-Z0-9_]*)" // any variable "\\.addOperand\\(" // in method call addOperand - "MCOperand_createImm\\(([0-9]+)\\)" // on a createImm call + "MCOperand_createImm\\((-?[0-9]+)\\)" // on a createImm call "\\)"; - std::string ReplaceAddCreateImm = "MCInst_CreateImm0($1, $2)"; + std::string ReplaceAddCreateImm = "MCOperand_CreateImm0($1, $2)"; Code = std::regex_replace(Code, std::regex(FindAddCreateImm), ReplaceAddCreateImm); } @@ -921,6 +921,21 @@ static void patchRISCVValidateCompressedInst(std::string &Code) { // modify calls to pass the argument std::string start = "(^|\\r?\\n)"; Code = std::regex_replace(Code, std::regex(start + "(\\s*RISCVValidate[^(]*)\\(" "(.*)" "\\)"),"$1$2(MI,$3)"); + + // patch CreateImm calls to CreateImm1 calls, the equivalent Capstone api + std::string FindCreateImmCalls = "(\\s*RISCVValidate[^(]*)\\(" "(.*)MCOperand_createImm\\((-?[0-9]+)\\)(.*)"; + std::string ReplaceCreateImmCalls = "$1($2MCOperand_CreateImm1(MI, $3)$4"; + Code = std::regex_replace(Code, std::regex(FindCreateImmCalls), ReplaceCreateImmCalls); + + // replace type names that don't exist in Capstone with their equivalents + Code = std::regex_replace(Code, std::regex("MachineInstr \\*"), "MCInst *"); + Code = std::regex_replace(Code, std::regex("MachineOperand \\*"),"MCOperand *"); + + // replace llvm_unreachable because it doesn't exist in Capstone + Code = std::regex_replace(Code, std::regex("llvm_unreachable.*"), "CS_ASSERT_RET_VAL(0,false); \n\t return false;"); + + // add a return at the end of the functions so a strict warnings-as-errors compiler won't complain + Code = std::regex_replace(Code, std::regex("bool RISCVValidate([\\s\\S]*?)\\}(\\s*)\\}(\\s*)\\}"), "bool RISCVValidate$1}$2}$3\treturn false;\n}"); } static void patchEvaluateAsConstantImm(std::string Target, std::string& Code) { From 5cbcec61c61a056d341b4a7e1ea831d71a729fab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D9=85=D8=B5=D8=B7=D9=81=D9=8A=20=D9=85=D8=AD=D9=85=D9=88?= =?UTF-8?q?=D8=AF=20=D9=83=D9=85=D8=A7=D9=84=20=D8=A7=D9=84=D8=AF=D9=8A?= =?UTF-8?q?=D9=86?= <48567303+moste00@users.noreply.github.com> Date: Wed, 22 Oct 2025 02:17:24 +0300 Subject: [PATCH 5/6] added isCall flag and generated MEM access for some instructions with missing MEM --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 6 +- llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 1 + llvm/utils/TableGen/PrinterCapstone.cpp | 82 ++++++++++++++++-------- 3 files changed, 61 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 114329c2c7c5..fea0b9deea38 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -610,13 +610,15 @@ def LUI : RVInstU, Sched<[WriteIALU]>; -def JAL : RVInstJ, Sched<[WriteJal]>; -def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd), + def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12), "jalr", "$rd, ${imm12}(${rs1})">, Sched<[WriteJalr, ReadJalr]>; + } // isCall = 1 } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 def BEQ : BranchCC_rri<0b000, "beq">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index 07137031d9fc..4aa7d4e96711 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -535,6 +535,7 @@ def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>, let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1), "c.jr", "$rs1">, Sched<[WriteJalr, ReadJalr]> { + let isBranch = 1; let isBarrier = 1; let isTerminator = 1; let rs2 = 0; diff --git a/llvm/utils/TableGen/PrinterCapstone.cpp b/llvm/utils/TableGen/PrinterCapstone.cpp index 9f524f7abd65..2b4a1f193b43 100644 --- a/llvm/utils/TableGen/PrinterCapstone.cpp +++ b/llvm/utils/TableGen/PrinterCapstone.cpp @@ -13,6 +13,7 @@ #include "Printer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/ErrorHandling.h" @@ -821,19 +822,19 @@ static void patchPrintOperandAddr(std::string &Decoder) { static void patchSTIObject(std::string Target, std::string &Code) { // searching for a subtarget arg in a function def or use - std::string FindSubtargetArg = ",?\\s*(const\\s+)?(MCSubtargetInfo|" + Target + "Subtarget)" // subtarget object type - + "\\s*[&*]\\s*" // as a reference or pointer - + "[a-zA-Z_][a-zA-Z0-9_]*"; // named anthing, optionally followed by comma (but not if it's the last arg) + std::string FindSubtargetArg = ",?\\s*(const\\s+)?(MCSubtargetInfo|" + Target + "Subtarget)" // subtarget object type + + "\\s*[&*]\\s*" // as a reference or pointer + + "[a-zA-Z_][a-zA-Z0-9_]*"; // named anthing, optionally followed by comma (but not if it's the last arg) Code = std::regex_replace(Code, std::regex(FindSubtargetArg), ""); // replace with nothing std::string FindGetFeatureBitsCall = "([a-zA-Z_][a-zA-Z0-9_]*\\.getFeatureBits\\(\\))" // a call of the form .getFeatureBits() "\\[([a-zA-Z_][a-zA-Z0-9_]*)\\]"; - std::string ReplaceGetFeatureBitsCall = Target + "_getFeatureBits(MI->csh->mode, $2)"; + std::string ReplaceGetFeatureBitsCall = Target + "_getFeatureBits(MI->csh->mode, $2)"; Code = std::regex_replace(Code, std::regex(FindGetFeatureBitsCall), ReplaceGetFeatureBitsCall); std::string FindLeftOverUsesOfSTI = ",?\\s*[&*]?\\s*STI"; Code = std::regex_replace(Code, std::regex(FindLeftOverUsesOfSTI), ""); - + // target-specific replacements if (Target == "RISCV") { std::string Find64BitChecks = "([a-zA-Z_][a-zA-Z0-9_]*)->is64Bit\\(\\)" "|" @@ -854,8 +855,8 @@ static void patchIsGetOperand(std::string Target, std::string& Code) { "(isReg|getReg" // isReg or getReg "|isImm|getImm)\\(\\)"; // or isImm or getImm (as $4) std::string ReplaceIsGetImmOrReg = "MCOperand_$4(MCInst_getOperand($1, $3))"; - Code = std::regex_replace(Code, std::regex(FindIsGetImmOrReg), ReplaceIsGetImmOrReg); - + Code = std::regex_replace(Code, std::regex(FindIsGetImmOrReg), ReplaceIsGetImmOrReg); + // patch individual getOperand calls std::string ReplaceGetOperand = "MCInst_getOperand($1, $3)"; Code = std::regex_replace(Code, std::regex(FindGetOperand), ReplaceGetOperand); @@ -874,7 +875,7 @@ static void patchAddOperandGetOperand(std::string Target, std::string& Code) { "getOperand\\(" "([^)]*)" "\\)" // of method getOperand and its argument "\\)"; std::string ReplaceAddGetOperand = "MCInst_addOperand2($1, MCInst_getOperand($2, $3))"; - Code = std::regex_replace(Code, std::regex(FindAddGetOperand), ReplaceAddGetOperand); + Code = std::regex_replace(Code, std::regex(FindAddGetOperand), ReplaceAddGetOperand); } static void patchAddOperandCreateOperand(std::string Target, std::string &Code) { @@ -905,7 +906,7 @@ static void patchRegClassContains(std::string Target, std::string& Code) { static void patchSetLoc(std::string Target, std::string& Code) { // there is no loc property in McInst struct in capstone std::string FindSetLocCalls = ".*\\.setLoc.*;"; - Code = std::regex_replace(Code, std::regex(FindSetLocCalls), ""); + Code = std::regex_replace(Code, std::regex(FindSetLocCalls), ""); } static void patchRISCVValidateCompressedInst(std::string &Code) { @@ -931,7 +932,7 @@ static void patchRISCVValidateCompressedInst(std::string &Code) { Code = std::regex_replace(Code, std::regex("MachineInstr \\*"), "MCInst *"); Code = std::regex_replace(Code, std::regex("MachineOperand \\*"),"MCOperand *"); - // replace llvm_unreachable because it doesn't exist in Capstone + // replace llvm_unreachable because it doesn't exist in Capstone Code = std::regex_replace(Code, std::regex("llvm_unreachable.*"), "CS_ASSERT_RET_VAL(0,false); \n\t return false;"); // add a return at the end of the functions so a strict warnings-as-errors compiler won't complain @@ -982,10 +983,10 @@ std::string PrinterCapstone::translateToC(std::string const &TargetName, patchAddOperandCreateOperand(TargetName, PatchedCode); patchRegClassContains(TargetName, PatchedCode); - + patchSetLoc(TargetName, PatchedCode); patchEvaluateAsConstantImm(TargetName, PatchedCode); - + patchSetOpcode(TargetName, PatchedCode); patchGetOpcode(TargetName, PatchedCode); patchIsBareSymbolRef(TargetName, PatchedCode); @@ -2872,6 +2873,9 @@ getNormalMnemonic(StringRef TargetName, StringRef Mnemonic, return normalizedMnemonic(Mnemonic, Upper, ReplaceDot, RemovePattern); } +std::string getLLVMInstEnumName(StringRef const &TargetName, + CodeGenInstruction const *CGI); + std::string getReqFeatures(StringRef const &TargetName, AsmMatcherInfo &AMI, std::unique_ptr const &MI, bool UseMI, CodeGenInstruction const *CGI) { @@ -3205,7 +3209,7 @@ std::string getPrimaryCSOperandType(Record const *OpRec) { return "CS_OP_REG"; else if (OperandType == "OPERAND_NM_GPREL21") return "CS_OP_REG"; - // RISCV (keep this as the last check because it matches a lot of strings, + // RISCV (keep this as the last check because it matches a lot of strings, // so it might shadow another architecture's operand names if it's moved up) else if (std::regex_match(OperandType, RiscvImmOperandsPattern)) return "CS_OP_IMM"; @@ -3279,6 +3283,7 @@ bool opIsPartOfiPTRPattern(Record const *OpRec, StringRef const &OpName, } DefInit *LeaveDef = dyn_cast(PatternDag->getArg(I)); + if (!LeaveDef) return false; bool Matches; @@ -3331,17 +3336,42 @@ std::string getCSOperandType( return OperandType += " | CS_OP_BOUND"; } } + bool RISCVInstrMayAccessMemory = TargetName == "RISCV" && (CGI->mayLoad || CGI->mayStore); + // some RISCV reg operands that hold addresses are not correctly classified by + // the above logic as MEM operands, this fixes the issue by an ugly asm string wrangling + if (RISCVInstrMayAccessMemory && OperandType == "CS_OP_REG") { + // if the reg name appears inside (${...}), it's an addressing register + if (Regex("\\(\\$\\{" + OpName.str() + "\\}\\)").match(CGI->AsmString)) { + OperandType += " | CS_OP_MEM"; + return OperandType; + } + } + // same as above but for immediate literals used as address offsets + if (RISCVInstrMayAccessMemory && OperandType == "CS_OP_IMM") { + // if the literal name appears in ${...}(___), it's an address offset + if (Regex("\\$\\{" + OpName.str() + "\\}\\(.*\\)").match(CGI->AsmString)) { + OperandType += " | CS_OP_MEM"; + return OperandType; + } + } + if (wrongMemClassification(TargetName, OpName)) { return OperandType; } + // some RISCV instructions have an extra MEM where it shouldn't be + // this flag will correct the problem with no effect for other archs + bool InstrMayAccessMemory = TargetName != "RISCV" || RISCVInstrMayAccessMemory; DagInit *PatternDag = nullptr; if (OperandType == "CS_OP_MEM") - // It is only marked as mem, we treat it as immediate. - OperandType += " | CS_OP_IMM"; + if (OpRec->getValue("RegClass") != nullptr) + OperandType += " | CS_OP_REG"; + else // It is only marked as mem, we treat it as immediate. + OperandType += " | CS_OP_IMM"; else if (OpRec->getValue("Type") && getValueType(OpRec->getValueAsDef("Type")) == - MVT::SimpleValueType::iPTR) + MVT::SimpleValueType::iPTR && + InstrMayAccessMemory) OperandType += " | CS_OP_MEM"; else if (!CGI->TheDef->isValueUnset("Pattern") && !CGI->TheDef->getValueAsListInit("Pattern")->empty()) { @@ -3361,11 +3391,11 @@ std::string getCSOperandType( return opIsPartOfiPTRPattern( OpRec, OpName, PatternDag->getValueAsDag("PatternToMatch"), false); }); - if (OpTypeIsPartOfAnyPattern) + if (OpTypeIsPartOfAnyPattern && InstrMayAccessMemory) OperandType += " | CS_OP_MEM"; return OperandType; } - if (PatternDag && opIsPartOfiPTRPattern(OpRec, OpName, PatternDag, false)) + if (PatternDag && opIsPartOfiPTRPattern(OpRec, OpName, PatternDag, false) && InstrMayAccessMemory) OperandType += " | CS_OP_MEM"; return OperandType; } @@ -4264,7 +4294,7 @@ std::string getTableNamespacePrefix(const GenericTable &Table, NSTable = &ARMNSTypePairs; else if (StringRef(TargetName).upper() == "SPARC") NSTable = &SparcNSTypePairs; - else if (StringRef(TargetName).upper() == "RISCV") + else if (StringRef(TargetName).upper() == "RISCV") return "RISCV_"; // don't bother with a table else PrintFatalNote("No Namespace Type table defined for target."); @@ -4363,7 +4393,7 @@ uint64_t BitsInitToUInt(const BitsInit *BI) { void PrinterCapstone::searchableTablesEmitKeyArray(const GenericTable &Table, const SearchIndex &Index, bool IsPrimary) const { - if (!IsPrimary) + if (!IsPrimary) return; if (Index.Fields.size() != 1) return; @@ -4377,23 +4407,23 @@ void PrinterCapstone::searchableTablesEmitKeyArray(const GenericTable &Table, if (Kind != RecTy::BitRecTyKind && Kind != RecTy::BitsRecTyKind && Kind != RecTy::IntRecTyKind && Kind != RecTy::StringRecTyKind) return; - + raw_string_ostream &OS = searchableTablesGetOS(ST_IMPL_OS); OS << "static const struct "; bool IsNumericIndex = Kind != RecTy::StringRecTyKind; // which struct to emit to represent the index type ? - if (IsNumericIndex) + if (IsNumericIndex) OS << "IndexType"; - else + else OS << "IndexTypeStr"; - + ListSeparator LS; OS << " Index[] = {\n" << LS; - + int64_t idx = 0; for (auto & entry : Table.Entries) { - OS << "{"; + OS << "{"; switch (Kind) { case RecTy::BitRecTyKind: OS << ((entry->getValueAsBit(IndexField.Name))? "true" : "false"); From 72ca092623806c9a97a9ee18b05f8f3b2f1fb350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D9=85=D8=B5=D8=B7=D9=81=D9=8A=20=D9=85=D8=AD=D9=85=D9=88?= =?UTF-8?q?=D8=AF=20=D9=83=D9=85=D8=A7=D9=84=20=D8=A7=D9=84=D8=AF=D9=8A?= =?UTF-8?q?=D9=86?= <48567303+moste00@users.noreply.github.com> Date: Wed, 22 Oct 2025 03:48:39 +0300 Subject: [PATCH 6/6] [BACKPATCH] fixed problem with c_srli immediates by cherry picking a fix from upstream, REMOVE WHEN REBASING ON TOP OF 20.0 --- .../RISCV/Disassembler/RISCVDisassembler.cpp | 21 +++++++++++++++++++ llvm/lib/Target/RISCV/RISCVInstrInfo.td | 3 +-- llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 3 +-- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 4dd039159e29..330930fc1fa8 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -276,6 +276,19 @@ static DecodeStatus decodeUImmOperand(MCInst &Inst, uint32_t Imm, return MCDisassembler::Success; } +static DecodeStatus decodeUImmLog2XLenOperand(MCInst &Inst, uint32_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<6>(Imm) && "Invalid immediate"); + + if (!Decoder->getSubtargetInfo().hasFeature(RISCV::Feature64Bit) && + !isUInt<5>(Imm)) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createImm(Imm)); + return MCDisassembler::Success; +} + template static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint32_t Imm, int64_t Address, @@ -285,6 +298,14 @@ static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint32_t Imm, return decodeUImmOperand(Inst, Imm, Address, Decoder); } +static DecodeStatus +decodeUImmLog2XLenNonZeroOperand(MCInst &Inst, uint32_t Imm, int64_t Address, + const MCDisassembler *Decoder) { + if (Imm == 0) + return MCDisassembler::Fail; + return decodeUImmLog2XLenOperand(Inst, Imm, Address, Decoder); +} + template static DecodeStatus decodeSImmOperand(MCInst &Inst, uint32_t Imm, int64_t Address, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index fea0b9deea38..bae2f2a2437d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -191,8 +191,7 @@ def uimmlog2xlen : RISCVOp, ImmLeaf(Imm); }]> { let ParserMatchClass = UImmLog2XLenAsmOperand; - // TODO: should ensure invalid shamt is rejected when decoding. - let DecoderMethod = "decodeUImmOperand<6>"; + let DecoderMethod = "decodeUImmLog2XLenOperand"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index 4aa7d4e96711..cae33914f9c3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -24,8 +24,7 @@ def uimmlog2xlennonzero : RISCVOp, ImmLeaf(Imm) && (Imm != 0); }]> { let ParserMatchClass = UImmLog2XLenNonZeroAsmOperand; - // TODO: should ensure invalid shamt is rejected when decoding. - let DecoderMethod = "decodeUImmNonZeroOperand<6>"; + let DecoderMethod = "decodeUImmLog2XLenNonZeroOperand"; let OperandType = "OPERAND_UIMMLOG2XLEN_NONZERO"; let MCOperandPredicate = [{ int64_t Imm;