Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c2cpg] Implemented support for JSON Compilation Database Files #5005

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package io.joern.c2cpg
import io.joern.c2cpg.Frontend.*
import io.joern.x2cpg.{X2CpgConfig, X2CpgMain}
import io.joern.x2cpg.utils.server.FrontendHTTPServer
import io.joern.x2cpg.SourceFiles
import org.slf4j.LoggerFactory
import scopt.OParser

Expand All @@ -16,7 +17,8 @@ final case class Config(
includePathsAutoDiscovery: Boolean = false,
skipFunctionBodies: Boolean = false,
noImageLocations: Boolean = false,
withPreprocessedFiles: Boolean = false
withPreprocessedFiles: Boolean = false,
compilationDatabase: Option[String] = None
) extends X2CpgConfig[Config] {
def withIncludePaths(includePaths: Set[String]): Config = {
this.copy(includePaths = includePaths).withInheritedFields(this)
Expand Down Expand Up @@ -57,6 +59,10 @@ final case class Config(
def withPreprocessedFiles(value: Boolean): Config = {
this.copy(withPreprocessedFiles = value).withInheritedFields(this)
}

def withCompilationDatabase(value: String): Config = {
this.copy(compilationDatabase = Some(value)).withInheritedFields(this)
}
}

private object Frontend {
Expand Down Expand Up @@ -103,7 +109,10 @@ private object Frontend {
opt[String]("define")
.unbounded()
.text("define a name")
.action((d, c) => c.withDefines(c.defines + d))
.action((d, c) => c.withDefines(c.defines + d)),
opt[String]("compilation-database")
.text("use this compilation database file (only handles files and compile settings listed in this file")
.action((d, c) => c.withCompilationDatabase(SourceFiles.toAbsolutePath(d, c.inputPath)))
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class AstCreator(
protected val usingDeclarationMappings: mutable.Map[String, String] = mutable.HashMap.empty

// TypeDecls with their bindings (with their refs) for lambdas and methods are not put in the AST
// where the respective nodes are defined. Instead we put them under the parent TYPE_DECL in which they are defined.
// where the respective nodes are defined. Instead, we put them under the parent TYPE_DECL in which they are defined.
// To achieve this we need this extra stack.
protected val methodAstParentStack: Stack[NewNode] = new Stack()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ trait AstForStatementsCreator(implicit withSchemaValidation: ValidationMode) { t
// We only handle un-parsable macros here for now
val isFromMacroExpansion = statement.getProblem.getNodeLocations.exists(_.isInstanceOf[IASTMacroExpansionLocation])
val asts = if (isFromMacroExpansion) {
new CdtParser(config).parse(statement.getRawSignature, Paths.get(statement.getContainingFilename)) match
new CdtParser(config, List.empty)
.parse(statement.getRawSignature, Paths.get(statement.getContainingFilename)) match
case Some(node) => node.getDeclarations.toIndexedSeq.flatMap(astsForDeclaration)
case None => Seq.empty
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package io.joern.c2cpg.parser

import better.files.File
import io.joern.c2cpg.Config
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.shiftleft.utils.IOUtils
import org.eclipse.cdt.core.dom.ast.gnu.c.GCCLanguage
import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage
Expand Down Expand Up @@ -41,13 +42,15 @@ object CdtParser {

}

class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorStatementsLogger {
class CdtParser(config: Config, compilationDatabase: List[CommandObject])
extends ParseProblemsLogger
with PreprocessorStatementsLogger {

import io.joern.c2cpg.parser.CdtParser._

private val headerFileFinder = new HeaderFileFinder(config.inputPath)
private val parserConfig = ParserConfig.fromConfig(config)
private val definedSymbols = parserConfig.definedSymbols.asJava
private val parserConfig = ParserConfig.fromConfig(config, compilationDatabase)
private val definedSymbols = parserConfig.definedSymbols
private val includePaths = parserConfig.userIncludePaths
private val log = new DefaultLogService

Expand Down Expand Up @@ -80,7 +83,12 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta
val additionalIncludes =
if (FileDefaults.isCPPFile(file.toString)) parserConfig.systemIncludePathsCPP
else parserConfig.systemIncludePathsC
new ScannerInfo(definedSymbols, (includePaths ++ additionalIncludes).map(_.toString).toArray)
val fileSpecificDefines = parserConfig.definedSymbolsPerFile.getOrElse(file.toString, Map.empty)
val fileSpecificIncludes = parserConfig.includesPerFile.getOrElse(file.toString, List.empty)
new ScannerInfo(
(definedSymbols ++ fileSpecificDefines).asJava,
fileSpecificIncludes.toArray ++ (includePaths ++ additionalIncludes).map(_.toString).toArray
)
}

private def parseInternal(code: String, inFile: File): IASTTranslationUnit = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package io.joern.c2cpg.parser

import io.joern.x2cpg.SourceFiles
import io.shiftleft.utils.IOUtils
import org.slf4j.LoggerFactory
import ujson.Value

import java.nio.file.Paths
import scala.util.Try

object JSONCompilationDatabaseParser {

private val logger = LoggerFactory.getLogger(getClass)

/** {{{
* 1) -D: Matches the -D flag, which is the key prefix for defining macros.
* 2) ([A-Za-z_][A-Za-z0-9_]+): Matches a valid macro name (which must start with a letter or underscore and can be followed by letters, numbers, or underscores).
* 3) (=(\\*".*"))?: Optionally matches = followed by either:
* a) A quoted string: Allows for strings in quotes.
* b) Any char sequence (.*") closed with a quote.
* }}}
*/
private val defineInCommandPattern = """-D([A-Za-z_][A-Za-z0-9_]+)(=(\\*".*"))?""".r

/** {{{
* 1) -I: Matches the -I flag, which indicates an include directory.
* 2) (\S+): Matches one or more non-whitespace characters, which represent the path of the directory.
* }}}
*/
private val includeInCommandPattern = """-I(\S+)""".r

case class CommandObject(directory: String, arguments: List[String], command: List[String], file: String) {

/** @return
* the file path (guaranteed to be absolute)
*/
def compiledFile(): String = SourceFiles.toAbsolutePath(file, directory)

private def nameValuePairFromDefine(define: String): (String, String) = {
val s = define.stripPrefix("-D")
if (s.contains("=")) {
val split = s.split("=")
(split.head, split(1))
} else {
(s, "")
}
}

private def pathFromInclude(include: String): String = include.stripPrefix("-I")

def includes(): List[String] = {
val includesFromArguments = arguments.filter(a => a.startsWith("-I")).map(pathFromInclude)
val includesFromCommand = command.flatMap { c =>
val includes = includeInCommandPattern.findAllIn(c).toList
includes.map(pathFromInclude)
}
includesFromArguments ++ includesFromCommand
}

def defines(): List[(String, String)] = {
val definesFromArguments = arguments.filter(a => a.startsWith("-D")).map(nameValuePairFromDefine)
val definesFromCommand = command.flatMap { c =>
val defines = defineInCommandPattern.findAllIn(c).toList
defines.map(nameValuePairFromDefine)
}
definesFromArguments ++ definesFromCommand
}
}

private def hasKey(node: Value, key: String): Boolean = Try(node(key)).isSuccess

private def safeArguments(obj: Value): List[String] = {
if (hasKey(obj, "arguments")) obj("arguments").arrOpt.map(_.toList.map(_.str)).getOrElse(List.empty)
else List.empty
}

private def safeCommand(obj: Value): List[String] = {
if (hasKey(obj, "command")) List(obj("command").str)
else List.empty
}

def parse(compileCommandsJson: String): List[CommandObject] = {
try {
val jsonContent = IOUtils.readEntireFile(Paths.get(compileCommandsJson))
val json = ujson.read(jsonContent)
val allCommandObjects = json.arr.toList
allCommandObjects.map { obj =>
CommandObject(obj("directory").str, safeArguments(obj), safeCommand(obj), obj("file").str)
}
} catch {
case t: Throwable =>
logger.warn(s"Could not parse '$compileCommandsJson'", t)
List.empty
}
}

}
Original file line number Diff line number Diff line change
@@ -1,28 +1,50 @@
package io.joern.c2cpg.parser

import io.joern.c2cpg.Config
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.joern.c2cpg.utils.IncludeAutoDiscovery

import java.nio.file.{Path, Paths}

object ParserConfig {

def empty: ParserConfig =
ParserConfig(Set.empty, Set.empty, Set.empty, Map.empty, logProblems = false, logPreprocessor = false)
ParserConfig(
Set.empty,
Set.empty,
Set.empty,
Map.empty,
Map.empty,
Map.empty,
logProblems = false,
logPreprocessor = false
)

def fromConfig(config: Config): ParserConfig = ParserConfig(
config.includePaths.map(Paths.get(_).toAbsolutePath),
IncludeAutoDiscovery.discoverIncludePathsC(config),
IncludeAutoDiscovery.discoverIncludePathsCPP(config),
config.defines.map {
case define if define.contains("=") =>
val s = define.split("=")
s.head -> s(1)
case define => define -> "true"
}.toMap ++ DefaultDefines.DEFAULT_CALL_CONVENTIONS,
config.logProblems,
config.logPreprocessor
)
def fromConfig(config: Config, compilationDatabase: List[CommandObject]): ParserConfig = {
val compilationDatabaseDefines = compilationDatabase.map { c =>
c.compiledFile() -> c.defines().toMap
}.toMap
val includes = compilationDatabase.map { c =>
c.compiledFile() -> c.includes()
}.toMap
ParserConfig(
config.includePaths.map(Paths.get(_).toAbsolutePath),
IncludeAutoDiscovery.discoverIncludePathsC(config),
IncludeAutoDiscovery.discoverIncludePathsCPP(config),
config.defines.map { define =>
if (define.contains("=")) {
val split = define.split("=")
split.head -> split(1)
} else {
define -> ""
}
}.toMap ++ DefaultDefines.DEFAULT_CALL_CONVENTIONS,
compilationDatabaseDefines,
includes,
config.logProblems,
config.logPreprocessor
)
}

}

Expand All @@ -31,6 +53,8 @@ case class ParserConfig(
systemIncludePathsC: Set[Path],
systemIncludePathsCPP: Set[Path],
definedSymbols: Map[String, String],
definedSymbolsPerFile: Map[String, Map[String, String]],
includesPerFile: Map[String, List[String]],
logProblems: Boolean,
logPreprocessor: Boolean
)
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import io.joern.c2cpg.Config
import io.joern.c2cpg.astcreation.AstCreator
import io.joern.c2cpg.astcreation.CGlobal
import io.joern.c2cpg.parser.{CdtParser, FileDefaults}
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser
import io.joern.c2cpg.parser.JSONCompilationDatabaseParser.CommandObject
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.passes.ForkJoinParallelCpgPass
import io.joern.x2cpg.SourceFiles
Expand All @@ -24,18 +26,21 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())

private val logger: Logger = LoggerFactory.getLogger(classOf[AstCreationPass])

private val global = new CGlobal()
private val file2OffsetTable: ConcurrentHashMap[String, Array[Int]] = new ConcurrentHashMap()
private val parser: CdtParser = new CdtParser(config)

private val global = new CGlobal()
private val compilationDatabase: List[CommandObject] =
config.compilationDatabase.map(JSONCompilationDatabaseParser.parse).getOrElse(List.empty)

private val parser: CdtParser = new CdtParser(config, compilationDatabase)

def typesSeen(): List[String] = global.usedTypes.keys().asScala.toList

def unhandledMethodDeclarations(): Map[String, CGlobal.MethodInfo] = {
global.methodDeclarations.asScala.toMap -- global.methodDefinitions.asScala.keys
}

override def generateParts(): Array[String] = {
private def sourceFilesFromDirectory(): Array[String] = {
val sourceFileExtensions = FileDefaults.SOURCE_FILE_EXTENSIONS
++ FileDefaults.HEADER_FILE_EXTENSIONS
++ Option.when(config.withPreprocessedFiles)(FileDefaults.PREPROCESSED_EXT).toList
Expand All @@ -60,6 +65,29 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())
}
}

private def sourceFilesFromCompilationDatabase(compilationDatabaseFile: String): Array[String] = {
if (compilationDatabase.isEmpty) {
logger.warn(s"'$compilationDatabaseFile' contains no source files. CPG will be empty.")
}
SourceFiles
.filterFiles(
compilationDatabase.map(_.compiledFile()),
config.inputPath,
ignoredDefaultRegex = Option(DefaultIgnoredFolders),
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.toArray
}

override def generateParts(): Array[String] = {
if (config.compilationDatabase.isEmpty) {
sourceFilesFromDirectory()
} else {
sourceFilesFromCompilationDatabase(config.compilationDatabase.get)
}
}

override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = {
val path = Paths.get(filename).toAbsolutePath
val relPath = SourceFiles.toRelativePath(path.toString, config.inputPath)
Expand Down
Loading
Loading