Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ private constructor(
val exclusionPatternsByRegex: List<Regex>,
/** Whether the type propagation system using [TypeObserver] should be disabled. */
val disableTypeObserver: Boolean,
/**
* If true, the C/C++ frontend prepends a small compiler-intrinsic prelude to every translation
* unit (e.g. `typedef char * __builtin_va_list;`) so that references to clang/gcc built-ins
* that no real header defines can be resolved. Off by default — it adds one implicit
* declaration per TU, which many existing tests count against.
*/
val injectCompilerBuiltins: Boolean = false,
) {
/** This list contains all languages which we want to translate. */
@JsonIgnore val languages: Set<KClass<out Language<*>>>
Expand Down Expand Up @@ -163,6 +170,7 @@ private constructor(
* still run in a single thread. This speeds up initial parsing but makes sure that further
* graph enrichment algorithms remain correct.
*/

val useParallelFrontends: Boolean

/**
Expand Down Expand Up @@ -283,6 +291,7 @@ private constructor(
private val exclusionPatternsByRegex = mutableListOf<Regex>()
private val exclusionPatternsByString = mutableListOf<String>()
private var disableTypeObserver = false
private var injectCompilerBuiltins = false

fun symbols(symbols: Map<String, String>): Builder {
this.symbols = symbols
Expand Down Expand Up @@ -739,6 +748,17 @@ private constructor(
return this
}

/**
* If true, the C/C++ frontend prepends a compiler-intrinsic prelude to every translation
* unit so that references to clang/gcc built-ins (e.g. `__builtin_va_list`) resolve to real
* typedefs instead of being inferred as bogus structs. Adds a small number of implicit
* declarations per TU.
*/
fun injectCompilerBuiltins(b: Boolean): Builder {
injectCompilerBuiltins = b
return this
}

@Throws(ConfigurationException::class)
fun build(): TranslationConfiguration {
registerExtraFrontendPasses()
Expand Down Expand Up @@ -772,6 +792,7 @@ private constructor(
exclusionPatternsByString,
exclusionPatternsByRegex,
disableTypeObserver,
injectCompilerBuiltins,
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ import org.eclipse.cdt.core.dom.parser.AbstractCLikeLanguage
import org.eclipse.cdt.core.index.IIndexFileLocation
import org.eclipse.cdt.core.model.ILanguage
import org.eclipse.cdt.core.parser.DefaultLogService
import org.eclipse.cdt.core.parser.ExtendedScannerInfo
import org.eclipse.cdt.core.parser.FileContent
import org.eclipse.cdt.core.parser.IncludeFileContentProvider
import org.eclipse.cdt.core.parser.ScannerInfo
import org.eclipse.cdt.internal.core.dom.parser.ASTNode
import org.eclipse.cdt.internal.core.dom.parser.ASTTranslationUnit
import org.eclipse.cdt.internal.core.dom.parser.c.CASTSimpleDeclSpecifier
Expand Down Expand Up @@ -234,7 +234,20 @@ open class CXXLanguageFrontend(ctx: TranslationContext, language: Language<CXXLa
?.let { symbols.putAll(it) }
}

val scannerInfo = ScannerInfo(symbols, includePaths.toTypedArray())
// Optionally prepend our compiler-intrinsic prelude the same way
// `clang -include cpg_builtins.h` would, so implicit typedefs like __builtin_va_list are
// resolved to real types instead of being inferred as bogus structs. Opt-in via
// `TranslationConfiguration.injectCompilerBuiltins` because it adds implicit declarations
// to every translation unit.
val includeFiles: Array<String> =
if (config.injectCompilerBuiltins) arrayOf(builtinsHeaderPath) else emptyArray()
val scannerInfo =
ExtendedScannerInfo(
symbols,
includePaths.toTypedArray(),
/* macroFiles = */ emptyArray(),
includeFiles,
)
val log = DefaultLogService()
val opts = ILanguage.OPTION_PARSE_INACTIVE_CODE // | ILanguage.OPTION_ADD_COMMENTS;
return try {
Expand Down Expand Up @@ -803,6 +816,23 @@ open class CXXLanguageFrontend(ctx: TranslationContext, language: Language<CXXLa
companion object {
private val LOGGER = LoggerFactory.getLogger(CXXLanguageFrontend::class.java)

/**
* Absolute path to a temporary copy of `cpg_builtins.h`, extracted lazily from the
* classpath so CDT's scanner can prepend it to every translation unit through
* [ExtendedScannerInfo.getIncludeFiles].
*
* We can't hand CDT a classpath URL directly, but a one-time extraction to a temp file
* (removed on JVM exit) is enough because CDT only needs the file to exist at parse time.
*/
private val builtinsHeaderPath: String by lazy {
val tmp = File.createTempFile("cpg_builtins_", ".h").apply { deleteOnExit() }
val resource =
CXXLanguageFrontend::class.java.getResourceAsStream("cpg_builtins.h")
?: error("cpg_builtins.h resource missing from cpg-language-cxx classpath")
resource.use { input -> tmp.outputStream().use { input.copyTo(it) } }
tmp.absolutePath
Comment thread
oxisto marked this conversation as resolved.
Outdated
}

private fun explore(node: IASTNode, indent: Int) {
val children = node.children
val s = StringBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,20 +431,26 @@ class ExpressionHandler(lang: CXXLanguageFrontend) :
else ->
Util.errorWithFileLocation(frontend, ctx, log, "unknown operator {}", ctx.operator)
}
if (operatorCode == "&") {
return newPointerReference(handle(ctx.operand)?.name, unknownType(), rawNode = ctx)
.apply {
if (input != null) {
this.input = input
}
}
} else if (operatorCode == "*") {
return newPointerDereference(handle(ctx.operand)?.name, unknownType(), rawNode = ctx)
.apply {
if (input != null) {
this.input = input
}
}
if (operatorCode == "&" || operatorCode == "*") {
// PointerReference / PointerDereference are Reference subclasses, so the name we
// hand in is fed into symbol resolution. Only meaningful when the operand is itself
// a Reference (`*x`, `*p->f`, `&x`); for arbitrary sub-expressions like `*(p++)` we
// must pass a null name, otherwise the SymbolResolver looks up nonsense like "++"
// and infers a phantom global variable.
//
// We also reuse the already-computed `input` node — calling handle(ctx.operand) a
// second time creates a duplicate, orphaned subtree.
val operand = input
val refName = (operand as? Reference)?.name

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if operand is not a reference?

Why do we need the operand variable? Couldn't we directly use input?

if (operatorCode == "&") {
val ref = newPointerReference(refName, unknownType(), rawNode = ctx)
if (operand != null) ref.input = operand
return ref
} else {
val deref = newPointerDereference(refName, unknownType(), rawNode = ctx)
if (operand != null) deref.input = operand
return deref
}
} else {
val unaryOperator =
newUnaryOperator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,22 @@
package de.fraunhofer.aisec.cpg.frontends.cxx

import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.declarations.Variable
import de.fraunhofer.aisec.cpg.graph.expressions.Assign
import de.fraunhofer.aisec.cpg.graph.expressions.Construction
import de.fraunhofer.aisec.cpg.graph.expressions.Literal
import de.fraunhofer.aisec.cpg.graph.expressions.New
import de.fraunhofer.aisec.cpg.graph.expressions.PointerDereference
import de.fraunhofer.aisec.cpg.graph.expressions.PointerReference
import de.fraunhofer.aisec.cpg.graph.expressions.UnaryOperator
import de.fraunhofer.aisec.cpg.test.*
import java.io.File
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertFalse
import kotlin.test.assertIs
import kotlin.test.assertNotNull
import kotlin.test.assertTrue

class CXXExpressionTest {
@Test
Expand Down Expand Up @@ -93,4 +100,58 @@ class CXXExpressionTest {
assertIs<Literal<*>>(gArg)
assertEquals(7, (gArg.value as Number).toInt())
}

/**
* Regression: `*_p++ = _c` (classic C idiom, e.g. Apple's `__sputc` in `_stdio.h`) used to make
* the outer [PointerDereference] inherit the name of the inner postfix `++` [UnaryOperator], so
* its name ended up as literally `"++"`. The [SymbolResolver] then treated that as a global
* reference, couldn't find a declaration and inferred a phantom [Variable] named `++`.
*
* The fix in `ExpressionHandler.handleUnaryExpression` derives the deref/addr-of name only when
* the operand is itself a [Reference] and passes null otherwise, and reuses the already handled
* sub-expression instead of running the handler a second time.
*/
@Test
fun testUnaryPointerDoesNotLeakOperatorName() {
val file = File("src/test/resources/c/deref_postincrement.c")
val tu =
analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) {
it.registerLanguage<CLanguage>()
}
assertNotNull(tu)

// The bug produced a global Variable declaration named "++". The fix must eliminate it.
val bogus = tu.variables { it.name.localName == "++" }
assertTrue(
bogus.isEmpty(),
"no Variable should be named '++', but found ${bogus.size}: " +
bogus.joinToString { "${it.name} @ ${it.location} (inferred=${it.isInferred})" },
)

// The RHS of the assignment inside `deref_postinc` is `*_p++`. Its shape must be a
// PointerDereference wrapping the postfix ++ UnaryOperator, and the deref must not carry
// the operator's name.
val derefFn = tu.functions["deref_postinc"]
assertNotNull(derefFn)
val assign = derefFn.allChildren<Assign>().singleOrNull()
assertNotNull(assign, "expected exactly one assignment in deref_postinc")
val deref = assign.lhs.singleOrNull()
assertIs<PointerDereference>(deref)
assertFalse(
deref.name.localName == "++",
"PointerDereference should not have inherited the '++' name from its operand",
)
val inc = deref.input
assertIs<UnaryOperator>(inc)
assertEquals("++", inc.operatorCode)
assertTrue(inc.isPostfix)

// Sanity: plain `&x` on a simple Reference still gets a meaningful name (there is no
// reason to lose precision when the operand *is* a Reference).
val addrFn = tu.functions["addr_of_local"]
assertNotNull(addrFn)
val addr = addrFn.allChildren<PointerReference>().singleOrNull()
assertNotNull(addr, "expected a PointerReference in addr_of_local")
assertEquals("x", addr.name.localName)
}
}
20 changes: 20 additions & 0 deletions cpg-language-cxx/src/test/resources/c/deref_postincrement.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Regression fixture for the ExpressionHandler unary-operator fix.
*
* Before the fix, `*_p++` collapsed into a PointerDereference whose *name* was
* copied from the inner postfix-increment (whose name is "++"), tricking the
* SymbolResolver into inferring a phantom global variable named "++". The idiom
* originates in Apple's _stdio.h (`__sputc`), which is where the bug first
* surfaced against the macOS system-header chain.
*/
int deref_postinc(int *_p, int _c) {
return (*_p++ = _c);
}

int addr_of_local(void) {
int x = 0;
/* Simple `&x` — the outer PointerReference should still carry the name "x"
so plain address-of on a Reference keeps working. */
int *p = &x;
return *p;
}
Loading
Loading