Skip to content

Commit

Permalink
[ruby_ast_gen] Control Structures (#5008)
Browse files Browse the repository at this point in the history
* Ruby AST Gen downloaded and unpacked

* Put AST Gen in front of ANTLR

* More entries

* Implemented JRuby for standalone capabilities. Edited SBT to help with DownloadHelper's assumptions on ensuring files are available.

* Migrated to creating RubyNodeCreator-style nodes directly

* Added all AstType entries

* Create match entries for all enums

* Added empty visit methods for each AST type

* More work on basic cases

* Completed exhaustive list

* Completed visitors for all types

* Executing JRuby from Joern's JVM process as dependency

* Inserted more missing cases

* Applied some control structures

* [ruby] Added json visitors for ifStmt, while, begin..end until

* Handle initial class/module body creation & summary creation (#5002)

* Handle initial class/module body creation & summary creation

* Separated tests between versions

* [ruby_ast_gen] Handling for Singleton & Anon Classes  (#5006)

* Improved runner stability and handling class fields

* [ruby_ast_gen] Handling for Singleton & Anon Classes
Additionally, added handling for range operators and lowered hash arguments in calls to named arguments to be consistent with current ANTLR interpretation of these args.

* Roll back AstGenRunner changes

* Remove line for diff

* Progress push

* Finished control structures

---------

Co-authored-by: David Baker Effendi <[email protected]>
  • Loading branch information
AndreiDreyer and DavidBakerEffendi authored Oct 17, 2024
1 parent 452b1e3 commit afa1556
Show file tree
Hide file tree
Showing 24 changed files with 3,339 additions and 229 deletions.
72 changes: 63 additions & 9 deletions joern-cli/frontends/rubysrc2cpg/build.sbt
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import better.files
import com.typesafe.config.{Config, ConfigFactory}
import versionsort.VersionHelper

import scala.sys.process.stringToProcess
import scala.util.Try

name := "rubysrc2cpg"

dependsOn(Projects.dataflowengineoss % "compile->compile;test->test", Projects.x2cpg % "compile->compile;test->test")

lazy val appProperties = settingKey[Config]("App Properties")
appProperties := {
val path = (Compile / resourceDirectory).value / "application.conf"
val path = (Compile / resourceDirectory).value / "application.conf"
val applicationConf = ConfigFactory.parseFile(path).resolve()
applicationConf
}
Expand All @@ -15,34 +20,83 @@ lazy val joernTypeStubsVersion = settingKey[String]("joern_type_stub version")
joernTypeStubsVersion := appProperties.value.getString("rubysrc2cpg.joern_type_stubs_version")

libraryDependencies ++= Seq(
"io.shiftleft" %% "codepropertygraph" % Versions.cpg,
"io.shiftleft" %% "codepropertygraph" % Versions.cpg,
"org.apache.commons" % "commons-compress" % Versions.commonsCompress, // For unpacking Gems with `--download-dependencies`
"org.scalatest" %% "scalatest" % Versions.scalatest % Test,
"org.antlr" % "antlr4-runtime" % Versions.antlr
"org.jruby" % "jruby-complete" % Versions.jRuby,
"org.scalatest" %% "scalatest" % Versions.scalatest % Test,
"org.antlr" % "antlr4-runtime" % Versions.antlr // TODO: Remove
)

enablePlugins(JavaAppPackaging, LauncherJarPlugin, Antlr4Plugin)

// TODO Remove antlr stuff
Antlr4 / antlr4Version := Versions.antlr
Antlr4 / antlr4GenVisitor := true
Antlr4 / javaSource := (Compile / sourceManaged).value

lazy val astGenVersion = settingKey[String]("ruby_ast_gen version")
astGenVersion := appProperties.value.getString("rubysrc2cpg.ruby_ast_gen_version")

libraryDependencies ++= Seq(
"io.shiftleft" %% "codepropertygraph" % Versions.cpg,
"org.scalatest" %% "scalatest" % Versions.scalatest % Test
)

lazy val astGenDlUrl = settingKey[String]("astgen download url")
astGenDlUrl := s"https://github.com/joernio/ruby_ast_gen/releases/download/v${astGenVersion.value}/"

def hasCompatibleAstGenVersion(astGenVersion: String): Boolean = {
Try("ruby_ast_gen --version".!!).toOption.map(_.strip()) match {
case Some(installedVersion) if installedVersion != "unknown" =>
VersionHelper.compare(installedVersion, astGenVersion) >= 0
case _ => false
}
}

lazy val astGenDlTask = taskKey[Unit](s"Download astgen binaries")
astGenDlTask := {
if (hasCompatibleAstGenVersion(astGenVersion.value)) {
Seq.empty
} else {
val astGenDir = baseDirectory.value / "bin" / "astgen"
astGenDir.mkdirs()
val gemName = s"ruby_ast_gen_v${astGenVersion.value}.zip"
val gemFullPath = astGenDir / gemName
val gemNoVersionFullPath = astGenDir / s"${gemName.stripSuffix(s"_v${astGenVersion.value}.zip")}.zip"
val unpackedGemNoVersion = gemName.stripSuffix(s"_v${astGenVersion.value}.zip")
val unpackedGemNoVersionFullPath = astGenDir / unpackedGemNoVersion
// We set this up so that the unpacked version is what the download helper aims to keep available
DownloadHelper.ensureIsAvailable(s"${astGenDlUrl.value}$gemName", gemNoVersionFullPath)

if (unpackedGemNoVersionFullPath.exists()) IO.delete(unpackedGemNoVersionFullPath)
IO.unzip(gemNoVersionFullPath, unpackedGemNoVersionFullPath)
val distDir = (Universal / stagingDirectory).value / "bin" / "astgen"
distDir.mkdirs()
IO.copyDirectory(astGenDir, distDir)

// permissions are lost during the download; need to set them manually
astGenDir.listFiles().foreach(_.setExecutable(true, false))
distDir.listFiles().foreach(_.setExecutable(true, false))
}
}

Compile / compile := ((Compile / compile) dependsOn astGenDlTask).value

lazy val joernTypeStubsDlUrl = settingKey[String]("joern_type_stubs download url")
joernTypeStubsDlUrl := s"https://github.com/joernio/joern-type-stubs/releases/download/v${joernTypeStubsVersion.value}/"

lazy val joernTypeStubsDlTask = taskKey[Unit]("Download joern-type-stubs")
joernTypeStubsDlTask := {
val joernTypeStubsDir = baseDirectory.value / "type_stubs"
val fileName = "rubysrc_builtin_types.zip"
val shaFileName = s"$fileName.sha512"
val fileName = "rubysrc_builtin_types.zip"
val shaFileName = s"$fileName.sha512"

joernTypeStubsDir.mkdir()

DownloadHelper.ensureIsAvailable(s"${joernTypeStubsDlUrl.value}$fileName", joernTypeStubsDir / fileName)
DownloadHelper.ensureIsAvailable(s"${joernTypeStubsDlUrl.value}$shaFileName", joernTypeStubsDir / shaFileName)

val typeStubsFile = better.files.File(joernTypeStubsDir.getAbsolutePath) / fileName
val checksumFile = better.files.File(joernTypeStubsDir.getAbsolutePath) / shaFileName
val checksumFile = better.files.File(joernTypeStubsDir.getAbsolutePath) / shaFileName

val typestubsSha = typeStubsFile.sha512

Expand All @@ -64,5 +118,5 @@ joernTypeStubsDlTask := {

Compile / compile := ((Compile / compile) dependsOn joernTypeStubsDlTask).value

Universal / packageName := name.value
Universal / packageName := name.value
Universal / topLevelDirectory := None
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
rubysrc2cpg {
ruby_ast_gen_version: "0.16.0"
joern_type_stubs_version: "0.6.0"
}
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
package io.joern.rubysrc2cpg

import io.joern.rubysrc2cpg.Frontend.*
import io.joern.x2cpg.DependencyDownloadConfig
import io.joern.x2cpg.X2CpgConfig
import io.joern.x2cpg.X2CpgMain
import io.joern.x2cpg.passes.frontend.TypeRecoveryParserConfig
import io.joern.x2cpg.passes.frontend.XTypeRecoveryConfig
import io.joern.x2cpg.astgen.AstGenConfig
import io.joern.x2cpg.{DependencyDownloadConfig, X2CpgConfig, X2CpgMain}
import io.joern.x2cpg.passes.frontend.{TypeRecoveryParserConfig, XTypeRecovery, XTypeRecoveryConfig}
import io.joern.x2cpg.typestub.TypeStubConfig
import io.joern.x2cpg.utils.server.FrontendHTTPServer
import scopt.OParser

import java.nio.file.Paths

final case class Config(
antlrCacheMemLimit: Double = 0.6d,
downloadDependencies: Boolean = false,
useTypeStubs: Boolean = true,
antlrDebug: Boolean = false,
antlrProfiling: Boolean = false
antlrProfiling: Boolean = false,
useJsonAst: Boolean = false
) extends X2CpgConfig[Config]
with DependencyDownloadConfig[Config]
with TypeRecoveryParserConfig[Config]
with TypeStubConfig[Config] {
with TypeStubConfig[Config]
with AstGenConfig[Config] {

override val astGenProgramName: String = "ruby_ast_gen"
override val astGenConfigPrefix: String = "rubysrc2cpg"
override val multiArchitectureBuilds: Boolean = true

this.defaultIgnoredFilesRegex = List("spec", "test", "tests", "vendor").flatMap { directory =>
List(s"(^|\\\\)$directory($$|\\\\)".r.unanchored, s"(^|/)$directory($$|/)".r.unanchored)
Expand All @@ -44,6 +50,10 @@ final case class Config(
override def withTypeStubs(value: Boolean): Config = {
copy(useTypeStubs = value).withInheritedFields(this)
}

def withUseJsonAst(value: Boolean): Config = {
copy(useJsonAst = value).withInheritedFields(this)
}
}

private object Frontend {
Expand Down Expand Up @@ -76,6 +86,9 @@ private object Frontend {
opt[Unit]("enable-file-content")
.action((_, c) => c.withDisableFileContent(false))
.text("Enable file content"),
opt[Unit]("json-ast")
.action((_, c) => c.withUseJsonAst(true))
.text("Use JSON Ast builder"),
DependencyDownloadConfig.parserOptions,
XTypeRecoveryConfig.parserOptionsForParserConfig,
TypeStubConfig.parserOptions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ import better.files.File
import io.joern.rubysrc2cpg.astcreation.AstCreator
import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.StatementList
import io.joern.rubysrc2cpg.datastructures.RubyProgramSummary
import io.joern.rubysrc2cpg.parser.{RubyNodeCreator, RubyParser}
import io.joern.rubysrc2cpg.parser.{
RubyAstGenRunner,
RubyJsonParser,
RubyJsonToNodeCreator,
RubyNodeCreator,
RubyParser
}
import io.joern.rubysrc2cpg.passes.{
AstCreationPass,
ConfigFileCreationPass,
Expand All @@ -13,25 +19,22 @@ import io.joern.rubysrc2cpg.passes.{
}
import io.joern.rubysrc2cpg.utils.DependencyDownloader
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.frontendspecific.rubysrc2cpg.{
ImplicitRequirePass,
ImportsPass,
RubyImportResolverPass,
RubyTypeHintCallLinker,
RubyTypeRecoveryPassGenerator
}
import io.joern.x2cpg.frontendspecific.rubysrc2cpg.*
import io.joern.x2cpg.passes.base.AstLinkerPass
import io.joern.x2cpg.passes.callgraph.NaiveCallLinker
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass, XTypeRecoveryConfig}
import io.joern.x2cpg.utils.{ConcurrentTaskUtil, ExternalCommand}
import io.joern.x2cpg.{SourceFiles, X2CpgFrontend}
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.Languages
import io.shiftleft.codepropertygraph.generated.{Cpg, Languages}
import io.shiftleft.passes.CpgPassBase
import io.shiftleft.semanticcpg.language.*
import org.slf4j.LoggerFactory
import upickle.default.*

import java.nio.file.{Files, Paths}
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}
import scala.util.matching.Regex
import scala.util.{Failure, Success, Try, Using}

Expand All @@ -49,43 +52,84 @@ class RubySrc2Cpg extends X2CpgFrontend[Config] {
}

private def createCpgAction(cpg: Cpg, config: Config): Unit = {
Using.resource(
new parser.ResourceManagedParser(config.antlrCacheMemLimit, config.antlrDebug, config.antlrProfiling)
) { parser =>
val astCreators = ConcurrentTaskUtil
.runUsingThreadPool(RubySrc2Cpg.generateParserTasks(parser, config, cpg.metaData.root.headOption))
.flatMap {
case Failure(exception) => logger.warn(s"Could not parse file, skipping - ", exception); None
case Success(astCreator) => Option(astCreator)
}
.filter(x => {
if x.fileContent.isBlank then logger.info(s"File content empty, skipping - ${x.fileName}")

!x.fileContent.isBlank
})

// Pre-parse the AST creators for high level structures
val internalProgramSummary = ConcurrentTaskUtil
.runUsingThreadPool(astCreators.map(x => () => x.summarize()).iterator)
.flatMap {
case Failure(exception) => logger.warn(s"Unable to pre-parse Ruby file, skipping - ", exception); None
case Success(summary) => Option(summary)
if (config.useJsonAst) {
File.usingTemporaryDirectory("rubysrc2cpgOut") { tmpDir =>
val astGenResult = RubyAstGenRunner(config).execute(tmpDir)

val astCreators = ConcurrentTaskUtil
.runUsingThreadPool(
RubySrc2Cpg.processAstGenRunnerResults(astGenResult.parsedFiles, config, cpg.metaData.root.headOption)
)
.flatMap {
case Failure(exception) => logger.warn(s"Unable to parse Ruby file, skipping -", exception); None
case Success(astCreator) => Option(astCreator)
}
.filter(x => {
if x.fileContent.isBlank then logger.info(s"File content empty, skipping - ${x.fileName}")
!x.fileContent.isBlank
})

val internalProgramSummary = ConcurrentTaskUtil
.runUsingThreadPool(astCreators.map(x => () => x.summarize()).iterator)
.flatMap {
case Failure(exception) => logger.warn(s"Unable to pre-parse Ruby file, skipping - ", exception); None
case Success(summary) => Option(summary)
}
.foldLeft(RubyProgramSummary(RubyProgramSummary.BuiltinTypes(config.typeStubMetaData)))(_ ++= _)

val dependencySummary = if (config.downloadDependencies) {
DependencyDownloader(cpg).download()
} else {
RubyProgramSummary()
}
.foldLeft(RubyProgramSummary(RubyProgramSummary.BuiltinTypes(config.typeStubMetaData)))(_ ++= _)

val dependencySummary = if (config.downloadDependencies) {
DependencyDownloader(cpg).download()
} else {
RubyProgramSummary()
val programSummary = internalProgramSummary ++= dependencySummary

AstCreationPass(cpg, astCreators.map(_.withSummary(programSummary))).createAndApply()
if config.downloadDependencies then {
DependencySummarySolverPass(cpg, dependencySummary).createAndApply()
}
TypeNodePass.withTypesFromCpg(cpg).createAndApply()
}
} else {
Using.resource(
new parser.ResourceManagedParser(config.antlrCacheMemLimit, config.antlrDebug, config.antlrProfiling)
) { parser =>
val astCreators = ConcurrentTaskUtil
.runUsingThreadPool(RubySrc2Cpg.generateParserTasks(parser, config, cpg.metaData.root.headOption))
.flatMap {
case Failure(exception) => logger.warn(s"Could not parse file, skipping - ", exception); None
case Success(astCreator) => Option(astCreator)
}
.filter(x => {
if x.fileContent.isBlank then logger.info(s"File content empty, skipping - ${x.fileName}")

!x.fileContent.isBlank
})

// Pre-parse the AST creators for high level structures
val internalProgramSummary = ConcurrentTaskUtil
.runUsingThreadPool(astCreators.map(x => () => x.summarize()).iterator)
.flatMap {
case Failure(exception) => logger.warn(s"Unable to pre-parse Ruby file, skipping - ", exception); None
case Success(summary) => Option(summary)
}
.foldLeft(RubyProgramSummary(RubyProgramSummary.BuiltinTypes(config.typeStubMetaData)))(_ ++= _)

val dependencySummary = if (config.downloadDependencies) {
DependencyDownloader(cpg).download()
} else {
RubyProgramSummary()
}

val programSummary = internalProgramSummary ++= dependencySummary
val programSummary = internalProgramSummary ++= dependencySummary

AstCreationPass(cpg, astCreators.map(_.withSummary(programSummary))).createAndApply()
if config.downloadDependencies then {
DependencySummarySolverPass(cpg, dependencySummary).createAndApply()
AstCreationPass(cpg, astCreators.map(_.withSummary(programSummary))).createAndApply()
if config.downloadDependencies then {
DependencySummarySolverPass(cpg, dependencySummary).createAndApply()
}
TypeNodePass.withTypesFromCpg(cpg).createAndApply()
}
TypeNodePass.withTypesFromCpg(cpg).createAndApply()
}
}

Expand Down Expand Up @@ -119,6 +163,31 @@ object RubySrc2Cpg {
.generate() ++ List(new RubyTypeHintCallLinker(cpg), new NaiveCallLinker(cpg), new AstLinkerPass(cpg))
}

/** Parses the generated AST Gen files in parallel and produces AstCreators from each.
*/
def processAstGenRunnerResults(
astFiles: List[String],
config: Config,
projectRoot: Option[String]
): Iterator[() => AstCreator] = {
astFiles.map { fileName => () =>
val parserResult = RubyJsonParser.readFile(Paths.get(fileName))
val relativeFileName = SourceFiles.toRelativePath(parserResult.fullPath, config.inputPath)
val rubyProgram = new RubyJsonToNodeCreator().visitProgram(parserResult.json)
val fileContent = (File(config.inputPath) / fileName).contentAsString
new AstCreator(
relativeFileName,
None,
Some(parserResult.json),
config.useJsonAst,
projectRoot,
enableFileContents = !config.disableFileContent,
fileContent = fileContent,
rootNode = Option(rubyProgram)
)(config.schemaValidation)
}.iterator
}

def generateParserTasks(
resourceManagedParser: parser.ResourceManagedParser,
config: Config,
Expand All @@ -139,7 +208,9 @@ object RubySrc2Cpg {
val fileContent = (File(config.inputPath) / fileName).contentAsString
new AstCreator(
fileName,
ctx,
Option(ctx),
None,
false,
projectRoot,
enableFileContents = !config.disableFileContent,
fileContent = fileContent,
Expand Down
Loading

0 comments on commit afa1556

Please sign in to comment.