Skip to content

Commit

Permalink
[c2cpg] Implemented support for JSON Compilation Database File
Browse files Browse the repository at this point in the history
  • Loading branch information
max-leuthaeuser committed Oct 17, 2024
1 parent e0e493d commit 511d4d2
Show file tree
Hide file tree
Showing 10 changed files with 309 additions and 29 deletions.
13 changes: 11 additions & 2 deletions joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package io.joern.c2cpg
import io.joern.c2cpg.Frontend.*
import io.joern.x2cpg.{X2CpgConfig, X2CpgMain}
import io.joern.x2cpg.utils.server.FrontendHTTPServer
import io.joern.x2cpg.SourceFiles
import org.slf4j.LoggerFactory
import scopt.OParser

Expand All @@ -16,7 +17,8 @@ final case class Config(
includePathsAutoDiscovery: Boolean = false,
skipFunctionBodies: Boolean = false,
noImageLocations: Boolean = false,
withPreprocessedFiles: Boolean = false
withPreprocessedFiles: Boolean = false,
compilationDatabase: Option[String] = None
) extends X2CpgConfig[Config] {
def withIncludePaths(includePaths: Set[String]): Config = {
this.copy(includePaths = includePaths).withInheritedFields(this)
Expand Down Expand Up @@ -57,6 +59,10 @@ final case class Config(
def withPreprocessedFiles(value: Boolean): Config = {
this.copy(withPreprocessedFiles = value).withInheritedFields(this)
}

def withCompilationDatabase(value: String): Config = {
this.copy(compilationDatabase = Some(value)).withInheritedFields(this)
}
}

private object Frontend {
Expand Down Expand Up @@ -103,7 +109,10 @@ private object Frontend {
opt[String]("define")
.unbounded()
.text("define a name")
.action((d, c) => c.withDefines(c.defines + d))
.action((d, c) => c.withDefines(c.defines + d)),
opt[String]("compilation-database")
.text("use this compilation database file (only handles files and compile settings listed in this file")
.action((d, c) => c.withCompilationDatabase(SourceFiles.toAbsolutePath(d, c.inputPath)))
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class AstCreator(
protected val usingDeclarationMappings: mutable.Map[String, String] = mutable.HashMap.empty

// TypeDecls with their bindings (with their refs) for lambdas and methods are not put in the AST
// where the respective nodes are defined. Instead we put them under the parent TYPE_DECL in which they are defined.
// where the respective nodes are defined. Instead, we put them under the parent TYPE_DECL in which they are defined.
// To achieve this we need this extra stack.
protected val methodAstParentStack: Stack[NewNode] = new Stack()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ trait AstForStatementsCreator(implicit withSchemaValidation: ValidationMode) { t
// We only handle un-parsable macros here for now
val isFromMacroExpansion = statement.getProblem.getNodeLocations.exists(_.isInstanceOf[IASTMacroExpansionLocation])
val asts = if (isFromMacroExpansion) {
new CdtParser(config).parse(statement.getRawSignature, Paths.get(statement.getContainingFilename)) match
new CdtParser(config, List.empty)
.parse(statement.getRawSignature, Paths.get(statement.getContainingFilename)) match
case Some(node) => node.getDeclarations.toIndexedSeq.flatMap(astsForDeclaration)
case None => Seq.empty
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package io.joern.c2cpg.parser

import better.files.File
import io.joern.c2cpg.Config
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.shiftleft.utils.IOUtils
import org.eclipse.cdt.core.dom.ast.gnu.c.GCCLanguage
import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage
Expand Down Expand Up @@ -41,13 +42,15 @@ object CdtParser {

}

class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorStatementsLogger {
class CdtParser(config: Config, compilationDatabase: List[CommandObject])
extends ParseProblemsLogger
with PreprocessorStatementsLogger {

import io.joern.c2cpg.parser.CdtParser._

private val headerFileFinder = new HeaderFileFinder(config.inputPath)
private val parserConfig = ParserConfig.fromConfig(config)
private val definedSymbols = parserConfig.definedSymbols.asJava
private val parserConfig = ParserConfig.fromConfig(config, compilationDatabase)
private val definedSymbols = parserConfig.definedSymbols
private val includePaths = parserConfig.userIncludePaths
private val log = new DefaultLogService

Expand Down Expand Up @@ -80,7 +83,11 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta
val additionalIncludes =
if (FileDefaults.isCPPFile(file.toString)) parserConfig.systemIncludePathsCPP
else parserConfig.systemIncludePathsC
new ScannerInfo(definedSymbols, (includePaths ++ additionalIncludes).map(_.toString).toArray)
val fileSpecificDefines = parserConfig.definedSymbolsPerFile.getOrElse(file.toString, Map.empty)
new ScannerInfo(
(definedSymbols ++ fileSpecificDefines).asJava,
(includePaths ++ additionalIncludes).map(_.toString).toArray
)
}

private def parseInternal(code: String, inFile: File): IASTTranslationUnit = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package io.joern.c2cpg.parser

import io.joern.x2cpg.SourceFiles
import io.shiftleft.utils.IOUtils
import org.slf4j.LoggerFactory
import ujson.Value

import java.nio.file.Paths
import scala.util.Try

object JSONCompilationDatabaseParser {

private val logger = LoggerFactory.getLogger(getClass)

/** {{{
* 1) -D: Matches the -D flag, which is the key prefix for defining macros.
* 2) ([A-Za-z_][A-Za-z0-9_]+): Matches a valid macro name (which must start with a letter or underscore and can be followed by letters, numbers, or underscores).
* 3) (=(\\*".*"))?: Optionally matches = followed by either:
* a) A quoted string: Allows for strings in quotes.
* b) Any char sequence (.*") closed with a quote.
* }}}
*/
private val defineInCommandPattern = """-D([A-Za-z_][A-Za-z0-9_]+)(=(\\*".*"))?""".r

case class CommandObject(directory: String, arguments: List[String], command: List[String], file: String) {

/** @return
* the file path (guaranteed to be absolute)
*/
def compiledFile(): String = SourceFiles.toAbsolutePath(file, directory)

private def nameValuePairFromDefine(define: String): (String, String) = {
val s = define.stripPrefix("-D")
if (s.contains("=")) {
val splitted = s.split("=")
(splitted.head, splitted(1))
} else {
(s, "1")
}
}

def defines(): List[(String, String)] = {
val definesFromArguments = arguments.filter(a => a.startsWith("-D")).map(nameValuePairFromDefine)
val definesFromCommand = command.flatMap { c =>
val defines = defineInCommandPattern.findAllIn(c).toList
defines.map(nameValuePairFromDefine)
}
definesFromArguments ++ definesFromCommand
}
}

private def hasKey(node: Value, key: String): Boolean = Try(node(key)).isSuccess

private def safeArguments(obj: Value): List[String] = {
if (hasKey(obj, "arguments")) obj("arguments").arrOpt.map(_.toList.map(_.str)).getOrElse(List.empty)
else List.empty
}

private def safeCommand(obj: Value): List[String] = {
if (hasKey(obj, "command")) List(obj("command").str)
else List.empty
}

def parse(compileCommandsJson: String): List[CommandObject] = {
try {
val jsonContent = IOUtils.readEntireFile(Paths.get(compileCommandsJson))
val json = ujson.read(jsonContent)
val allCommandObjects = json.arr.toList
allCommandObjects.map { obj =>
CommandObject(obj("directory").str, safeArguments(obj), safeCommand(obj), obj("file").str)
}
} catch {
case t: Throwable =>
logger.warn(s"Could not parse '$compileCommandsJson'", t)
List.empty
}
}

}
Original file line number Diff line number Diff line change
@@ -1,28 +1,35 @@
package io.joern.c2cpg.parser

import io.joern.c2cpg.Config
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.joern.c2cpg.utils.IncludeAutoDiscovery

import java.nio.file.{Path, Paths}

object ParserConfig {

def empty: ParserConfig =
ParserConfig(Set.empty, Set.empty, Set.empty, Map.empty, logProblems = false, logPreprocessor = false)
ParserConfig(Set.empty, Set.empty, Set.empty, Map.empty, Map.empty, logProblems = false, logPreprocessor = false)

def fromConfig(config: Config): ParserConfig = ParserConfig(
config.includePaths.map(Paths.get(_).toAbsolutePath),
IncludeAutoDiscovery.discoverIncludePathsC(config),
IncludeAutoDiscovery.discoverIncludePathsCPP(config),
config.defines.map {
case define if define.contains("=") =>
val s = define.split("=")
s.head -> s(1)
case define => define -> "true"
}.toMap ++ DefaultDefines.DEFAULT_CALL_CONVENTIONS,
config.logProblems,
config.logPreprocessor
)
def fromConfig(config: Config, compilationDatabase: List[CommandObject]): ParserConfig = {
val compilationDatabaseDefines = compilationDatabase.map { c =>
c.compiledFile() -> c.defines().toMap
}.toMap
ParserConfig(
config.includePaths.map(Paths.get(_).toAbsolutePath),
IncludeAutoDiscovery.discoverIncludePathsC(config),
IncludeAutoDiscovery.discoverIncludePathsCPP(config),
config.defines.map {
case define if define.contains("=") =>
val s = define.split("=")
s.head -> s(1)
case define => define -> "true"
}.toMap ++ DefaultDefines.DEFAULT_CALL_CONVENTIONS,
compilationDatabaseDefines,
config.logProblems,
config.logPreprocessor
)
}

}

Expand All @@ -31,6 +38,7 @@ case class ParserConfig(
systemIncludePathsC: Set[Path],
systemIncludePathsCPP: Set[Path],
definedSymbols: Map[String, String],
definedSymbolsPerFile: Map[String, Map[String, String]],
logProblems: Boolean,
logPreprocessor: Boolean
)
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import io.joern.c2cpg.Config
import io.joern.c2cpg.astcreation.AstCreator
import io.joern.c2cpg.astcreation.CGlobal
import io.joern.c2cpg.parser.{CdtParser, FileDefaults}
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.passes.ForkJoinParallelCpgPass
import io.joern.x2cpg.SourceFiles
Expand All @@ -24,18 +26,21 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())

private val logger: Logger = LoggerFactory.getLogger(classOf[AstCreationPass])

private val global = new CGlobal()
private val file2OffsetTable: ConcurrentHashMap[String, Array[Int]] = new ConcurrentHashMap()
private val parser: CdtParser = new CdtParser(config)

private val global = new CGlobal()
private val compilationDatabase: List[CommandObject] =
config.compilationDatabase.map(JSONCompilationDatabaseParser.parse).getOrElse(List.empty)

private val parser: CdtParser = new CdtParser(config, compilationDatabase)

def typesSeen(): List[String] = global.usedTypes.keys().asScala.toList

def unhandledMethodDeclarations(): Map[String, CGlobal.MethodInfo] = {
global.methodDeclarations.asScala.toMap -- global.methodDefinitions.asScala.keys
}

override def generateParts(): Array[String] = {
private def sourceFilesFromDirectory(): Array[String] = {
val sourceFileExtensions = FileDefaults.SOURCE_FILE_EXTENSIONS
++ FileDefaults.HEADER_FILE_EXTENSIONS
++ Option.when(config.withPreprocessedFiles)(FileDefaults.PREPROCESSED_EXT).toList
Expand All @@ -60,6 +65,29 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())
}
}

private def sourceFilesFromCompilationDatabase(compilationDatabaseFile: String): Array[String] = {
if (compilationDatabase.isEmpty) {
logger.warn(s"'$compilationDatabaseFile' contains no source files. CPG will be empty.")
}
SourceFiles
.filterFiles(
compilationDatabase.map(_.compiledFile()),
config.inputPath,
ignoredDefaultRegex = Option(DefaultIgnoredFolders),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.toArray
}

override def generateParts(): Array[String] = {
if (config.compilationDatabase.isEmpty) {
sourceFilesFromDirectory()
} else {
sourceFilesFromCompilationDatabase(config.compilationDatabase.get)
}
}

override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = {
val path = Paths.get(filename).toAbsolutePath
val relPath = SourceFiles.toRelativePath(path.toString, config.inputPath)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,30 @@ package io.joern.c2cpg.passes
import io.joern.c2cpg.C2Cpg.DefaultIgnoredFolders
import io.joern.c2cpg.Config
import io.joern.c2cpg.parser.{CdtParser, FileDefaults}
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.joern.x2cpg.SourceFiles
import org.eclipse.cdt.core.dom.ast.{
IASTPreprocessorIfStatement,
IASTPreprocessorIfdefStatement,
IASTPreprocessorIfStatement,
IASTPreprocessorStatement
}
import org.slf4j.LoggerFactory

import java.nio.file.Paths
import scala.collection.parallel.CollectionConverters.ImmutableIterableIsParallelizable
import scala.collection.parallel.immutable.ParIterable

class PreprocessorPass(config: Config) {

private val parser = new CdtParser(config)
private val logger = LoggerFactory.getLogger(classOf[PreprocessorPass])

private val compilationDatabase: List[CommandObject] =
config.compilationDatabase.map(JSONCompilationDatabaseParser.parse).getOrElse(List.empty)

def run(): ParIterable[String] =
private val parser = new CdtParser(config, compilationDatabase)

private def sourceFilesFromDirectory(): ParIterable[String] = {
SourceFiles
.determine(
config.inputPath,
Expand All @@ -29,6 +37,32 @@ class PreprocessorPass(config: Config) {
)
.par
.flatMap(runOnPart)
}

private def sourceFilesFromCompilationDatabase(compilationDatabaseFile: String): ParIterable[String] = {
if (compilationDatabase.isEmpty) {
logger.warn(s"'$compilationDatabaseFile' contains no source files.")
}
SourceFiles
.filterFiles(
compilationDatabase.map(_.compiledFile()),
config.inputPath,
ignoredDefaultRegex = Option(DefaultIgnoredFolders),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.par
.flatMap(runOnPart)
}

def run(): ParIterable[String] = {
if (config.compilationDatabase.isEmpty) {
sourceFilesFromDirectory()
} else {
sourceFilesFromCompilationDatabase(config.compilationDatabase.get)
}

}

private def preprocessorStatement2String(stmt: IASTPreprocessorStatement): Option[String] = stmt match {
case s: IASTPreprocessorIfStatement =>
Expand Down
Loading

0 comments on commit 511d4d2

Please sign in to comment.