Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add test cases for js imports #1258

Merged
merged 28 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ee31211
add test cases for js imports
khemrajrathore Aug 20, 2024
77ae5b4
fixing test cases
khemrajrathore Aug 21, 2024
c7839af
more test cases
khemrajrathore Aug 21, 2024
f5f3548
skip running dataflow passes in metadata scan (#1259)
khemrajrathore Aug 21, 2024
e8b6bb6
more test case
khemrajrathore Aug 21, 2024
a370d6a
revert a change in postprocessing
khemrajrathore Aug 21, 2024
6808a67
skip applying post processing pass in metadata scan (#1260)
khemrajrathore Aug 21, 2024
1e6ab25
Merge branch 'dev' into metadata/jsImport
khemrajrathore Aug 21, 2024
6704f1d
refactor test case
khemrajrathore Aug 21, 2024
965ba9c
add tsconfig.json parsing logic
khemrajrathore Aug 21, 2024
136fadf
add cache
khemrajrathore Aug 21, 2024
e020d10
more log
khemrajrathore Aug 21, 2024
b04c0b5
logs
khemrajrathore Aug 21, 2024
e5b3f38
logs
khemrajrathore Aug 21, 2024
4a5f34b
more test case
khemrajrathore Aug 22, 2024
338e93c
introduce cache
khemrajrathore Aug 22, 2024
cd2769f
introduce concurrent map
khemrajrathore Aug 22, 2024
905bc93
optimization
khemrajrathore Aug 22, 2024
fa60195
remove unused logs
khemrajrathore Aug 22, 2024
841a2cb
use triemap
khemrajrathore Aug 22, 2024
2cee99e
remove cache
khemrajrathore Aug 22, 2024
171f6af
refactoring
khemrajrathore Aug 22, 2024
6db2f2f
refactor
khemrajrathore Aug 22, 2024
3d6db58
remove unnecessary test case
khemrajrathore Aug 22, 2024
bc5ec98
Merge branch 'dev' into metadata/jsImport
khemrajrathore Aug 22, 2024
29d7264
consider exclusion regex when reading files for tsconfig.json
khemrajrathore Aug 22, 2024
e4ff9c6
Merge branch 'dev' into metadata/jsImport
khemrajrathore Aug 22, 2024
ed06e7d
fix - minor typo
khemrajrathore Aug 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,66 +1,200 @@
package ai.privado.languageEngine.javascript.metadata

import ai.privado.cache.FileLinkingMetadata
import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache}
import ai.privado.passes.JsonParser
import io.joern.x2cpg.passes.frontend.XImportResolverPass
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Call

import java.util.regex.{Matcher, Pattern}
import java.io.File as JFile
import scala.util.Try
import java.io.{FileNotFoundException, File as JFile}
import better.files.File
import better.files.File.VisitOptions
import io.joern.x2cpg.SourceFiles

class FileImportMappingPassJS(cpg: Cpg, fileLinkingMetadata: FileLinkingMetadata)
extends XImportResolverPass(cpg: Cpg) {
import java.util.concurrent.ConcurrentHashMap
import scala.collection.mutable
import scala.util.control.Breaks.{break, breakable}
import scala.util.{Failure, Success, Try}

class FileImportMappingPassJS(
cpg: Cpg,
fileLinkingMetadata: FileLinkingMetadata,
appCache: AppCache,
ruleCache: RuleCache
) extends XImportResolverPass(cpg: Cpg)
with JsonParser {

private val pathPattern = Pattern.compile("[\"']([\\w/.]+)[\"']")

val sep: String = Matcher.quoteReplacement(JFile.separator)
val root = s"${sanitiseProbeScanPath(codeRootDir)}${JFile.separator}"

private val tsConfigPathMapping = mutable.HashMap[String, String]()

private val tsConfigEntityMissCache = ConcurrentHashMap.newKeySet[String]()

override def init(): Unit = {
// initialize tsconfig.json map
initializeConfigMap()
}

override protected def optionalResolveImport(
fileName: String,
importCall: Call,
importedEntity: String,
importedAs: String,
diffGraph: DiffGraphBuilder
): Unit = {
val pathSep = ":"
val rawEntity = importedEntity.stripPrefix("./")
val alias = importedAs
val matcher = pathPattern.matcher(rawEntity)
val sep = Matcher.quoteReplacement(JFile.separator)
val root = s"$codeRootDir${JFile.separator}"
val currentFile = s"$root$fileName"
val extension = better.files.File(currentFile).`extension`.getOrElse(".ts")
val pathSep = ":"
val currentFile = s"$root$fileName"
val extension = File(currentFile).`extension`.getOrElse(".ts")
val parentDirPath = File(currentFile).parent.pathAsString // Stores the path of the parent dir of current file

val importedModule = getImportingModule(importedEntity, pathSep)

// We want to know if the import is local since if an external name is used to match internal methods we may have
// false paths.
val isLocalImport = importedEntity.matches("^[.]+/?.*")
// TODO: At times there is an operation inside of a require, e.g. path.resolve(__dirname + "/../config/env/all.js")
// this tries to recover the string but does not perform string constant propagation
val entity = if (matcher.find()) matcher.group(1) else rawEntity

val isImportingModule = !entity.contains(pathSep)
if (isLocalImport) {
val resolvedPath = Try(
better.files
.File(currentFile.stripSuffix(currentFile.split(sep).last), entity.split(pathSep).head)
.pathAsString
.stripPrefix(root)
).getOrElse(entity)
fileLinkingMetadata.addToFileImportMap(fileName, s"$resolvedPath$extension")
} else {
val seperatedFilePathList = fileName.split(sep).toList
val startingModule = entity.split(sep).head
val moduleIndex = seperatedFilePathList.indexOf(startingModule)
if (moduleIndex != -1) {
Try {
val resolvedPath = better.files
.File(root, seperatedFilePathList.take(moduleIndex).mkString(sep), entity.split(pathSep).head)
.pathAsString
.stripPrefix(root)
fileLinkingMetadata.addToFileImportMap(fileName, s"$resolvedPath$extension")
val isRelativeImport = importedEntity.matches("^[.]+/?.*")

if (isRelativeImport && importedModule.isDefined) {
getResolvedPath(parentDirPath, importedModule.get, importedAs, extension) match
case Failure(_) => // unable to resolve
case Success(resolvedPath) => fileLinkingMetadata.addToFileImportMap(fileName, resolvedPath)
} else if (importedModule.isDefined) {
val relativeDirCount = parentDirPath.stripPrefix(root).split(sep).length
breakable {
for (i <- 0 to relativeDirCount) {
val resolvedPath =
getResolvedPath(
parentDirPath.split(sep).dropRight(i).mkString(sep),
importedModule.get,
importedAs,
extension
)
if (resolvedPath.isSuccess) {
fileLinkingMetadata.addToFileImportMap(fileName, resolvedPath.get)
break
}
}
}
}
}

/** Function to get us a a probable relative file path, if exists for the importing module based on input parameters
* @param parentDirPath
* \- parent dir path where we intend to do the lookup
* @param relativePath
* \- importing module path
* @param importedAs
* \- importedAs value of import
* @param currentFileExtension
* \- current extension of the file being processed
* @return
*/
def getResolvedPath(
parentDirPath: String,
relativePath: String,
importedAs: String,
currentFileExtension: String
): Try[String] =
Try {
val file = File(parentDirPath, relativePath)
if (file.exists && file.isRegularFile) {
file.pathAsString.stripPrefix(root)
} else {
// If not found, try to find a file with the same name extension
val baseName = file.nameWithoutExtension
val parentDir = file.parent
val fileWithSameNames = parentDir.list.filter { f =>
f.isRegularFile && f.nameWithoutExtension == baseName
}.toList

// If multiple files match with sameName, prefer the one having same extension
fileWithSameNames.size match
case size if size == 0 => throw FileNotFoundException()
case size if size == 1 => fileWithSameNames.head.pathAsString.stripPrefix(root)
case _ =>
fileWithSameNames.find(f => f.`extension`.exists(_.equals(currentFileExtension))) match
case Some(fileWithSameNameAndExtension) => fileWithSameNameAndExtension.pathAsString.stripPrefix(root)
case None => fileWithSameNames.head.pathAsString.stripPrefix(root)
}
}

/** From ImportedEntity after applying some lookup gives the importing Module
* @param importedEntity
* \- The value present as part of import or require statement
* @param pathSep
* \- The path sep used to concat the importing modules elements
* @return
*/
private def getImportingModule(importedEntity: String, pathSep: String) = {
importedEntity.split(pathSep).head match
case entity if entity.startsWith("@") =>
// if import starts with `@` this can mean import of local modules in some case
if (tsConfigPathMapping.contains(entity)) {
Some(tsConfigPathMapping(entity))
} else if (tsConfigEntityMissCache.contains(entity))
None
else {
tsConfigPathMapping.keys.filter(_.endsWith("*")).find { k =>
val keyRegex = k.replace("*", ".*").r
val value = keyRegex.matches(entity)
value
} match
case Some(configKey) =>
val configPathValue = tsConfigPathMapping(configKey).stripSuffix("*")
val resolvedModule = entity.replace(configKey.stripSuffix("*"), configPathValue)
// println(s"ResolvedModule : $resolvedModule, for $entity and $importedEntity")
Some(resolvedModule)
case None =>
tsConfigEntityMissCache.add(entity)
None
}
case entity => Some(entity)
}

/** Returns all the file paths where tsconfig.json or jsconfig.json are defined
* @return
*/
private def getJsonPathConfigFiles: List[String] = {
val repoPath = sanitiseProbeScanPath(appCache.scanPath)
val filePaths = SourceFiles
.determine(repoPath, Set(".json"), ignoredFilesRegex = Option(ruleCache.getExclusionRegex.r))(
VisitOptions.default
)

val filteredFilePaths = filePaths.filter { fp =>
val f = File(fp)
f.nameWithoutExtension.contains("tsconfig") || f.nameWithoutExtension.contains("jsconfig")
}
filteredFilePaths
}

/** Initializes the configuration map by reading the configurations files
*/
private def initializeConfigMap(): Unit = {
val compilerPathConstant = "compilerOptions.paths"
val configFilePaths = getJsonPathConfigFiles

configFilePaths.foreach { configFilePath =>
getJSONKeyValuePairs(configFilePath)
.filter(_._1.contains(compilerPathConstant))
.foreach { pathEntry =>
// do clean up of the paths key
// We would get keys like - compilerOptions.paths.@utils/*[0]
val pathKey = pathEntry._1.split(s"${compilerPathConstant}.").last.split("\\[").head
val pathValue = pathEntry._2
tsConfigPathMapping.addOne(pathKey, pathValue)
}
}
}

/** Function to sanitise the scanPath and remove `/probe` as we copy files in /probe folder, this helps in lookup for
* resolvedPaths
* @param scanPath
* \- repo path used for scanning
* @return
*/
private def sanitiseProbeScanPath(scanPath: String) = scanPath.replace(s"${sep}probe", "")
}
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ class JavascriptProcessor(
else
JsSrc2Cpg.postProcessingPasses(cpg).foreach(_.createAndApply())
if (privadoInput.fileLinkingReport) {
new FileImportMappingPassJS(cpg, fileLinkingMetadata).createAndApply()
new FileImportMappingPassJS(cpg, fileLinkingMetadata, appCache, ruleCache).createAndApply()
}
}

Expand Down
48 changes: 48 additions & 0 deletions src/main/scala/ai/privado/passes/JsonParser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package ai.privado.passes

import io.circe.Json
import io.circe.parser.parse
import org.slf4j.LoggerFactory
import better.files.File

trait JsonParser {

/** Parses a JSON file and returns a list of key-value pairs for properties related to database connections and API
* endpoints.
*
* @param file
* the path to the JSON file to parse
* @return
* a list of key-value pairs where the keys match either the database connection or API endpoint naming conventions
*/
def getJSONKeyValuePairs(file: String): List[(String, String)] = {
val json = parse(File(file).contentAsString)

// Recursively scan through the JSON to extract out all keys
def extractKeyValuePairs(json: Json, prefix: String = ""): List[(String, String)] = {
json match {
case obj if obj.isObject =>
obj.asObject.get.toMap.toList.flatMap { case (key, value) =>
val newPrefix = if (prefix.isEmpty) key else s"$prefix.$key"
extractKeyValuePairs(value, newPrefix)
}
case arr if arr.isArray =>
arr.asArray.get.toList.zipWithIndex.flatMap { case (value, index) =>
val newPrefix = s"$prefix[$index]"
extractKeyValuePairs(value, newPrefix)
}
case other =>
List((prefix, other.asString.getOrElse(other.toString)))
}
}

val keyValuePairs = json match {
case Right(jsonObject) => extractKeyValuePairs(jsonObject)
case Left(parsingError) =>
List.empty
}

keyValuePairs
}

}
45 changes: 3 additions & 42 deletions src/main/scala/ai/privado/passes/JsonPropertyParserPass.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ import better.files.File
import scala.collection.mutable
import scala.io.Source
import scala.util.{Try, Using}
class JsonPropertyParserPass(cpg: Cpg, projectRoot: String) extends PrivadoParallelCpgPass[String](cpg) {
class JsonPropertyParserPass(cpg: Cpg, projectRoot: String)
extends PrivadoParallelCpgPass[String](cpg)
with JsonParser {

val logger = LoggerFactory.getLogger(getClass)
override def generateParts(): Array[String] = {

val files = Try(File(projectRoot).listRecursively.filter(_.isRegularFile).map(_.path.toString).toArray).toOption
Expand All @@ -40,44 +41,4 @@ class JsonPropertyParserPass(cpg: Cpg, projectRoot: String) extends PrivadoParal
builder.addNode(propertyNode)
propertyNode
}

/** Parses a JSON file and returns a list of key-value pairs for properties related to database connections and API
* endpoints.
*
* @param file
* the path to the JSON file to parse
* @return
* a list of key-value pairs where the keys match either the database connection or API endpoint naming conventions
*/
private def getJSONKeyValuePairs(file: String): List[(String, String)] = {
import better.files.File
val json = parse(File(file).contentAsString)

// Recursively scan through the JSON to extract out all keys
def extractKeyValuePairs(json: Json, prefix: String = ""): List[(String, String)] = {
json match {
case obj if obj.isObject =>
obj.asObject.get.toMap.toList.flatMap { case (key, value) =>
val newPrefix = if (prefix.isEmpty) key else s"$prefix.$key"
extractKeyValuePairs(value, newPrefix)
}
case arr if arr.isArray =>
arr.asArray.get.toList.zipWithIndex.flatMap { case (value, index) =>
val newPrefix = s"$prefix[$index]"
extractKeyValuePairs(value, newPrefix)
}
case other =>
List((prefix, other.asString.getOrElse(other.toString)))
}
}

val keyValuePairs = json match {
case Right(jsonObject) => extractKeyValuePairs(jsonObject)
case Left(parsingError) =>
logger.debug(parsingError.toString)
List.empty
}

keyValuePairs
}
}
Loading
Loading