Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release PR #1248

Merged
merged 5 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions schema/src/main/scala/Constants/CpgSchemaConstants.scala
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,8 @@ object CpgSchemaConstants {

val SCHEDULE_PAUSED_NAME = "SCHEDULE_PAUSED"

val ORIGINAL_SOURCE_EDGE_NAME = "ORIGINAL_SOURCE"

val DERIVED_SOURCE_EDGE_NAME = "DERIVED_SOURCE"

}
6 changes: 6 additions & 0 deletions schema/src/main/scala/CpgExtSchema.scala
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,12 @@ class CpgExtSchema(builder: SchemaBuilder, cpgSchema: CpgSchema) {
highTouchSinkNode.addOutEdge(edge = taggedBy, inNode = tag)

// HighTouch sink node - End

// Edge from derived source to the original source
private val originalSourceEdge = builder.addEdgeType(CpgSchemaConstants.ORIGINAL_SOURCE_EDGE_NAME)
private val derivedSourceEdge = builder.addEdgeType(CpgSchemaConstants.DERIVED_SOURCE_EDGE_NAME)
astNode.addOutEdge(edge = originalSourceEdge, inNode = astNode)
astNode.addOutEdge(edge = derivedSourceEdge, inNode = astNode)
}

object CpgExtSchema {
Expand Down
8 changes: 5 additions & 3 deletions src/main/scala/ai/privado/dataflow/Dataflow.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import io.joern.dataflowengineoss.language.*
import io.joern.dataflowengineoss.queryengine.{EngineConfig, EngineContext}
import io.joern.dataflowengineoss.semanticsloader.Semantics
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, Call, CfgNode, HightouchSink}
import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, Call, CfgNode}
import io.shiftleft.semanticcpg.language.*
import org.slf4j.LoggerFactory
import overflowdb.traversal.Traversal
Expand Down Expand Up @@ -96,7 +96,10 @@ class Dataflow(cpg: Cpg, statsRecorder: StatsRecorder) {
_.tag.nameExact(Constants.catLevelOne).valueExact(CatLevelOne.SOURCES.name),
_.tag.nameExact(InternalTag.OBJECT_OF_SENSITIVE_CLASS_BY_MEMBER_NAME.toString)
)
sinks.reachableByFlows(firstLevelSources).l

sinks
.reachableByFlows(firstLevelSources)
.l
}
// Commented the below piece of code as we still need to test out and fix few open Issues which are
// resulting in FP in 2nd level derivation for Storages
Expand Down Expand Up @@ -207,7 +210,6 @@ class Dataflow(cpg: Cpg, statsRecorder: StatsRecorder) {
case _ => JavaSemanticGenerator.getDefaultSemantics
}
}

}

object Dataflow {
Expand Down
54 changes: 54 additions & 0 deletions src/main/scala/ai/privado/entrypoint/DynamicRuleMerger.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package ai.privado.entrypoint

import ai.privado.model.{ConfigAndRules, FilterProperty, RuleInfo}
import org.slf4j.LoggerFactory

import scala.collection.mutable
import scala.collection.mutable.Map
import scala.collection.mutable.ListBuffer

trait DynamicRuleMerger {

private val logger = LoggerFactory.getLogger(this.getClass)

def mergeDynamicRuleSinkForDependencyDiscovery(
externalSinkRules: List[RuleInfo],
internalSinkRules: List[RuleInfo]
): List[RuleInfo] = {
try {

val internalRuleMap = mutable.Map(
internalSinkRules.map(rule => ((rule.domains.headOption.get, rule.name, rule.filterProperty), rule))*
)

externalSinkRules.foreach { externalRule =>
val externalDomain = externalRule.domains.headOption.get
val externalRuleName = externalRule.name
val externalFilterProperty = externalRule.filterProperty

internalRuleMap.collectFirst {
case ((domain, name, filterProperty), rule)
if (domain == externalDomain || name == externalRuleName) && rule.id.contains(
"ThirdParties.SDK"
) && filterProperty != FilterProperty.CODE =>
(domain, name, filterProperty, rule)
} match
case Some(_, _, _, matchingRule: RuleInfo) =>
val updatedRule = matchingRule.copy(patterns = matchingRule.patterns ++ externalRule.patterns)
internalRuleMap.update(
(matchingRule.domains.headOption.get, matchingRule.name, matchingRule.filterProperty),
updatedRule
)
case _ =>
internalRuleMap((externalDomain, externalRuleName, externalFilterProperty)) = externalRule
}

internalRuleMap.values.toList
} catch {
case e: Exception =>
logger.error("Error while merging dynamic rules")
logger.debug("Error occurred ", e)
externalSinkRules ++ internalSinkRules
}
}
}
8 changes: 4 additions & 4 deletions src/main/scala/ai/privado/entrypoint/RuleProcessor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import io.circe.yaml.parser
import ai.privado.utility.Utilities.{isValidDEDRule, isValidRule}
import io.circe.Json

trait RuleProcessor {
trait RuleProcessor extends DynamicRuleMerger {

private val logger = LoggerFactory.getLogger(this.getClass)

Expand Down Expand Up @@ -310,9 +310,9 @@ trait RuleProcessor {
* In case of duplicates it will keep the elements from "externalRules.sources".
* We don't know the internal logic. We came to this conclusion based on testing few samples.
*/
val exclusions = externalConfigAndRules.exclusions ++ internalConfigAndRules.exclusions
val sources = externalConfigAndRules.sources ++ internalConfigAndRules.sources
val sinks = externalConfigAndRules.sinks ++ internalConfigAndRules.sinks
val exclusions = externalConfigAndRules.exclusions ++ internalConfigAndRules.exclusions
val sources = externalConfigAndRules.sources ++ internalConfigAndRules.sources
val sinks = mergeDynamicRuleSinkForDependencyDiscovery(externalConfigAndRules.sinks, internalConfigAndRules.sinks)
val collections = externalConfigAndRules.collections ++ internalConfigAndRules.collections
val policies = externalConfigAndRules.policies ++ internalConfigAndRules.policies
val threats = externalConfigAndRules.threats ++ internalConfigAndRules.threats
Expand Down
131 changes: 46 additions & 85 deletions src/main/scala/ai/privado/exporter/ExporterUtility.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import ai.privado.cache.PropertyFilterCacheEncoderDecoder.*
import ai.privado.entrypoint.PrivadoInput
import ai.privado.languageEngine.default.NodeStarters
import ai.privado.metric.MetricHandler
import ai.privado.model.Constants.outputDirectoryName
import ai.privado.model.Constants.{catLevelOne, outputDirectoryName}
import ai.privado.model.{CatLevelOne, Constants, DataFlowPathModel, InternalTag, Language, NodeType, PolicyThreatType}
import ai.privado.model.exporter.{
AndroidPermissionModel,
Expand Down Expand Up @@ -68,10 +68,7 @@ import ai.privado.utility.Utilities
import ai.privado.utility.Utilities.{dump, getTruncatedText}
import ai.privado.tagger.sink.SinkArgumentUtility
import io.shiftleft.codepropertygraph.generated.nodes.*
import overflowdb.traversal.Traversal
import io.shiftleft.semanticcpg.language.*
import ai.privado.languageEngine.java.language.*
import ai.privado.tagger.AssetTagger
import better.files.File
import io.circe.Json
import io.circe.syntax.EncoderOps
Expand All @@ -86,6 +83,8 @@ import scala.concurrent.duration.Duration
import ExecutionContext.Implicits.global
import scala.util.{Failure, Success, Try}
import privado_core.BuildInfo
import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, Call, CfgNode}
import io.shiftleft.semanticcpg.language.*

object ExporterUtility {

Expand All @@ -103,89 +102,51 @@ object ExporterUtility {
val lang = appCache.repoLanguage
val isPython = lang == Language.PYTHON

val sizeOfList = nodes.size
nodes.zipWithIndex.flatMap { case (node, index) =>
val currentNodeModel =
convertIndividualPathElement(node, index, sizeOfList, appCache = appCache, ruleCache = ruleCache)
if (
index == 0 && node.tag.nameExact(Constants.catLevelOne).valueExact(CatLevelOne.DERIVED_SOURCES.name).nonEmpty
) {
var typeFullName = Iterator(node).isIdentifier.typeFullName.headOption.getOrElse("")

// Temporary fix for python to match the typeFullName
typeFullName = updateTypeFullNameForPython(typeFullName, isPython)

// Going 1 level deep for derived sources to add extra nodes
taggerCache.typeDeclMemberCache
.getOrElse(typeFullName, TrieMap[String, mutable.Set[Member]]())
.get(sourceId) match {
case Some(members: mutable.HashSet[Member]) =>
// Picking up only the head as any path to base is sufficient
val member: Member = members.head
var typeFullNameLevel2 = member.typeFullName // java.lang.string
// Temporary fix for python to match the typeFullName
typeFullNameLevel2 = updateTypeFullNameForPython(typeFullNameLevel2, isPython)

taggerCache.typeDeclMemberCache
.getOrElse(typeFullNameLevel2, TrieMap[String, mutable.Set[Member]]())
.get(sourceId) match {
case Some(member2Set: mutable.HashSet[Member]) =>
// Picking up only the head as any path to base is sufficient
val member2 = member2Set.head
// Going 2 level deep for derived sources to add extra nodes
convertIndividualPathElement(
member2,
messageInExcerpt = generateDSMemberMsg(member2.name, typeFullNameLevel2),
appCache = appCache,
ruleCache = ruleCache
) ++ convertIndividualPathElement(
member,
messageInExcerpt = generateDSMemberMsg(member.name, typeFullName),
appCache = appCache,
ruleCache = ruleCache
) ++ currentNodeModel
case _ =>
convertIndividualPathElement(
member,
messageInExcerpt = generateDSMemberMsg(member.name, typeFullName),
appCache = appCache,
ruleCache = ruleCache
) ++ currentNodeModel
}
val sizeOfList = nodes.size
val originalSource = getOriginalSourceForDerivedNode(nodes.headOption, sourceId)

case _ => // Checking if 2nd level is of Extends type
taggerCache
.getTypeDeclExtendingTypeDeclCacheItem(typeFullName)
.get(sourceId) match {
case Some(typeDecl: TypeDecl) => // Fetching information for the 2nd level member node
taggerCache.typeDeclMemberCache
.getOrElse(typeDecl.fullName, TrieMap[String, mutable.Set[Member]]())
.get(sourceId) match {
case Some(members: mutable.HashSet[Member]) =>
// Picking up only the head as any path to base is sufficient
val member = members.head
val currentTypeDeclNode = // Fetching the current TypeDecl node
taggerCache.typeDeclDerivedByExtendsCache.get(typeFullName)
convertIndividualPathElement(
member,
messageInExcerpt = generateDSMemberMsg(member.name, typeDecl.fullName),
appCache = appCache,
ruleCache = ruleCache
) ++ convertIndividualPathElement(
currentTypeDeclNode.get,
messageInExcerpt = generateDSExtendsMsg(typeDecl.name, typeFullName),
appCache = appCache,
ruleCache = ruleCache
) ++ currentNodeModel
case _ =>
currentNodeModel
}
case _ =>
currentNodeModel
}
}
} else currentNodeModel
val pathElements = nodes.zipWithIndex.flatMap { case (node, index) =>
convertIndividualPathElement(node, index, sizeOfList, appCache = appCache, ruleCache = ruleCache)
}

if (originalSource.isDefined) {
val sourceNode = originalSource.get
return convertIndividualPathElement(
sourceNode,
0,
sizeOfList + 1,
generateExcerptMessageForMemberNode(sourceNode),
appCache = appCache,
ruleCache = ruleCache
).get +: pathElements
}

pathElements
}

private def generateExcerptMessageForMemberNode(sourceNode: AstNode): String = {
sourceNode match
case member: Member => generateDSMemberMsg(member.name, getSurroundingTypeDeclFullName(member))
case _ => ""
}

private def getSurroundingTypeDeclFullName(node: AstNode): String = {
node match
case node: Member => node.typeDecl.fullName
case _ => ""
}

/** Retrieves the original source node for a derived node if the node has the tag indicating it is a derived source.
*
* @param node
* the AST node to check
* @param sourceId
* the identifier of the source
* @return
* an Option containing the original source node if found, or None otherwise
*/
private def getOriginalSourceForDerivedNode(node: Option[AstNode], sourceId: String): Option[AstNode] = {
node.get.originalSource(sourceId)
}

/** Convert Individual path element
Expand Down
6 changes: 4 additions & 2 deletions src/main/scala/ai/privado/exporter/SourceExporter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,12 @@ class SourceExporter(
val processingMap = mutable.HashMap[String, mutable.Set[AstNode]]()
sourcesList.foreach(source => {
def addToMap(sourceId: String): Unit = {
val sourceIterator = source.originalSourceOut.l
val sourceNode = if (sourceIterator.nonEmpty) sourceIterator.head.asInstanceOf[AstNode] else source
if (processingMap.contains(sourceId)) {
processingMap(sourceId) = processingMap(sourceId).addOne(source)
processingMap(sourceId) = processingMap(sourceId).addOne(sourceNode)
} else {
processingMap.addOne(sourceId -> mutable.Set(source))
processingMap.addOne(sourceId -> mutable.Set(sourceNode))
}
}
source.tag.nameExact(Constants.id).value.filter(!_.startsWith(Constants.privadoDerived)).foreach(addToMap)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import ai.privado.languageEngine.java.tagger.source.Utility.{
import ai.privado.model.{CatLevelOne, Constants, InternalTag, RuleInfo}
import ai.privado.tagger.PrivadoParallelCpgPass
import ai.privado.tagger.utility.SourceTaggerUtility.getTypeDeclWithMemberNameHavingMemberName
import ai.privado.utility.Utilities.{addRuleTags, storeForTag}
import ai.privado.utility.Utilities.{addOriginalSourceEdgeAndTag, addRuleTags, storeForTag}
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.semanticcpg.language.{ICallResolver, NoResolve}
import io.shiftleft.semanticcpg.language.*
Expand Down Expand Up @@ -79,6 +79,9 @@ class IdentifierTagger(cpg: Cpg, ruleCache: RuleCache, taggerCache: TaggerCache)
.l ::: cpg.parameter.where(_.typeFullName(typeDeclFullName)).l
impactedObjects
.foreach(impactedObject => {
// Add edge between derived source node and the original source
addOriginalSourceEdgeAndTag(builder, impactedObject, typeDeclMember, ruleCache)

if (impactedObject.tag.nameExact(Constants.id).l.isEmpty) {
storeForTag(builder, impactedObject, ruleCache)(
InternalTag.OBJECT_OF_SENSITIVE_CLASS_BY_MEMBER_NAME.toString,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ class IdentifierTagger(cpg: Cpg, ruleCache: RuleCache, taggerCache: TaggerCache)
.l ::: cpg.parameter.where(_.typeFullName(typeDeclFullName)).l
impactedObjects
.foreach(impactedObject => {
// Add edge between derived source node and the original source
addOriginalSourceEdgeAndTag(builder, impactedObject, typeDeclMember, ruleCache)

if (impactedObject.tag.nameExact(Constants.id).l.isEmpty) {
storeForTag(builder, impactedObject, ruleCache)(
InternalTag.OBJECT_OF_SENSITIVE_CLASS_BY_MEMBER_NAME.toString,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@

package ai.privado.languageEngine.java

import ai.privado.model.Constants
import io.shiftleft.codepropertygraph.generated.{Cpg, EdgeTypes, NodeTypes}
import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, CfgNode, File, JavaProperty, Literal, MethodParameterIn}
import overflowdb.traversal.*
import io.shiftleft.semanticcpg.language.*

import scala.jdk.CollectionConverters.IteratorHasAsScala
import scala.util.Try
Expand Down Expand Up @@ -73,4 +75,34 @@ package object language {
None
}
}

implicit class NodeToOriginalSource(val node: AstNode) extends AnyVal {
def originalSource: Option[AstNode] = {
val _originalSource = node.out(EdgeTypes.ORIGINAL_SOURCE)
if (_originalSource.nonEmpty && _originalSource.hasNext) {
return Option(_originalSource.next().asInstanceOf[AstNode])
}
None
}

def originalSource(sourceId: String): Option[AstNode] = {
val _originalSource = node.out(EdgeTypes.ORIGINAL_SOURCE)
if (_originalSource.nonEmpty && _originalSource.hasNext) {
return _originalSource
.find(node => node.asInstanceOf[AstNode].tag.nameExact(Constants.id).value(sourceId).nonEmpty)
.asInstanceOf[Option[AstNode]]
}
None
}
}

implicit class OriginalToDerivedSource(val node: AstNode) extends AnyVal {
def derivedSource: Option[AstNode] = {
val _derivedSource = node.out(EdgeTypes.DERIVED_SOURCE)
if (_derivedSource.nonEmpty && _derivedSource.hasNext) {
return Option(_derivedSource.next().asInstanceOf[AstNode])
}
None
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class FirstLevelDerivedSourceTagger(
.where(_.typeFullName(typeDeclVal))
.l ::: cpgNodeCache.cachedParameter.where(_.typeFullName(typeDeclVal)).l)
.foreach(impactedObject => {
addOriginalSourceEdgeAndTag(builder, impactedObject, typeDeclMember, ruleCache)
addFirstLevelDerivedSourceTags(
builder,
impactedObject,
Expand Down
Loading
Loading