Skip to content

Commit f304fee

Browse files
committed
C++: Implement alias analysis for dataflow.
1 parent 8781cb0 commit f304fee

File tree

5 files changed

+397
-4
lines changed

5 files changed

+397
-4
lines changed
Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,344 @@
1+
private import cpp as Cpp
2+
private import SsaInternals as Ssa
3+
private import codeql.ssa.Ssa as SsaImplCommon
4+
private import DataFlowPrivate
5+
private import DataFlowUtil as Public
6+
private import DataFlowNodes as Nodes
7+
private import semmle.code.cpp.ir.IR
8+
private import semmle.code.cpp.ir.internal.IRCppLanguage
9+
10+
private module SsaInput implements SsaImplCommon::InputSig<Cpp::Location> {
11+
import SsaInternalsCommon::InputSigCommon
12+
13+
class SourceVariable = Ssa::SourceVariable;
14+
15+
/**
16+
* Holds if `instr` flows to the destination address of a `StoreInstruction`
17+
* and flows from a read of some definition.
18+
*/
19+
private predicate fwd(Node1Impl n, int indirectionIndex) {
20+
nodeHasInstruction1(n, any(VariableAddressInstruction vai), indirectionIndex)
21+
or
22+
exists(Node1Impl n0 |
23+
fwd(n0, indirectionIndex) and
24+
simpleLocalFlowStep1(n0, n, _)
25+
)
26+
}
27+
28+
/**
29+
* Holds if `instr` flows to the destination address of a `StoreInstruction`
30+
*/
31+
private predicate revStore(Node1Impl n, int indirectionIndex, int k) {
32+
fwd(pragma[only_bind_into](n), pragma[only_bind_into](indirectionIndex)) and
33+
(
34+
indirectionIndex > k and
35+
nodeHasOperand1(n, any(StoreInstruction store).getDestinationAddressOperand(),
36+
indirectionIndex - k)
37+
or
38+
exists(Node1Impl n1 |
39+
revStore(n1, pragma[only_bind_into](indirectionIndex), k) and
40+
simpleLocalFlowStep1(n, n1, _)
41+
)
42+
)
43+
}
44+
45+
private newtype TStoreNode1Impl =
46+
MkStoreNode1Impl(Node1Impl n, int indirectionIndex, int k) { revStore(n, indirectionIndex, k) }
47+
48+
/**
49+
* This predicate holds if
50+
* ```
51+
* conversionFlow(instr1.getAUse(), instr2, _, false)
52+
* ```
53+
* and both `instr1` and `instr2` are instructions on a path from a read of
54+
* some definition to the destination address of a `StoreInstruction`.
55+
*/
56+
private predicate flowStoreStep(TStoreNode1Impl node1, TStoreNode1Impl node2) {
57+
exists(Node1Impl n1, Node1Impl n2, int indirectionIndex, int k |
58+
node1 =
59+
MkStoreNode1Impl(n1, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and
60+
node2 =
61+
MkStoreNode1Impl(n2, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and
62+
simpleLocalFlowStep1(n1, n2, _)
63+
)
64+
}
65+
66+
private predicate storeSink(TStoreNode1Impl sink) {
67+
exists(Node1Impl n, int indirectionIndex, int k |
68+
sink = MkStoreNode1Impl(n, indirectionIndex, k) and
69+
// Subtract one because a store writes to the _indirection_ of the address operand
70+
nodeHasOperand1(n, any(StoreInstruction store).getDestinationAddressOperand(),
71+
indirectionIndex - k)
72+
)
73+
}
74+
75+
private predicate storeSource(TStoreNode1Impl source) {
76+
exists(Node1Impl n, int indirectionIndex, int k |
77+
source = MkStoreNode1Impl(n, indirectionIndex, k) and
78+
nodeHasInstruction1(n, any(VariableAddressInstruction vai), indirectionIndex)
79+
)
80+
}
81+
82+
private predicate flowStorePlusImpl(TStoreNode1Impl node1, TStoreNode1Impl node2) =
83+
doublyBoundedFastTC(flowStoreStep/2, storeSource/1, storeSink/1)(node1, node2)
84+
85+
private predicate flowStoreStepTCImpl(TStoreNode1Impl node1, TStoreNode1Impl node2) {
86+
storeSource(node1) and
87+
storeSink(node2) and
88+
(
89+
flowStorePlusImpl(node1, node2)
90+
or
91+
node1 = node2
92+
)
93+
}
94+
95+
private predicate flowStoreStepTC(Node1Impl n1, Node1Impl n2, int indirectionIndex, int k) {
96+
exists(TStoreNode1Impl node1, TStoreNode1Impl node2 |
97+
node1 =
98+
MkStoreNode1Impl(n1, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and
99+
node2 =
100+
MkStoreNode1Impl(n2, pragma[only_bind_into](indirectionIndex), pragma[only_bind_into](k)) and
101+
flowStoreStepTCImpl(node1, node2)
102+
)
103+
}
104+
105+
/**
106+
* Holds if `instr` flows to the destination address of a `StoreInstruction`
107+
*/
108+
private predicate revLoad(Node1Impl n, int indirectionIndex) {
109+
fwd(pragma[only_bind_into](n), pragma[only_bind_into](indirectionIndex)) and
110+
(
111+
nodeHasOperand1(n, _, indirectionIndex)
112+
or
113+
exists(Node1Impl n1 |
114+
revLoad(n1, pragma[only_bind_into](indirectionIndex)) and
115+
simpleLocalFlowStep1(n, n1, _)
116+
)
117+
)
118+
}
119+
120+
private newtype TLoadNode1Impl =
121+
MkLoadNode1Impl(Node1Impl n, int indirectionIndex) { revLoad(n, indirectionIndex) }
122+
123+
private predicate flowLoadStep(TLoadNode1Impl node1, TLoadNode1Impl node2) {
124+
exists(Node1Impl n1, Node1Impl n2, int indirectionIndex |
125+
node1 = MkLoadNode1Impl(n1, pragma[only_bind_into](indirectionIndex)) and
126+
node2 = MkLoadNode1Impl(n2, pragma[only_bind_into](indirectionIndex)) and
127+
simpleLocalFlowStep1(n1, n2, _)
128+
)
129+
}
130+
131+
private predicate loadSink(TLoadNode1Impl sink) {
132+
exists(Node1Impl n, int indirectionIndex |
133+
sink = MkLoadNode1Impl(n, indirectionIndex) and
134+
nodeHasOperand1(n, _, indirectionIndex)
135+
)
136+
}
137+
138+
private predicate loadSource(TLoadNode1Impl source) {
139+
exists(Node1Impl n, int indirectionIndex |
140+
source = MkLoadNode1Impl(n, indirectionIndex) and
141+
nodeHasInstruction1(n, any(VariableAddressInstruction vai), indirectionIndex)
142+
)
143+
}
144+
145+
private predicate flowLoadPlusImpl(TLoadNode1Impl node1, TLoadNode1Impl node2) =
146+
doublyBoundedFastTC(flowLoadStep/2, loadSource/1, loadSink/1)(node1, node2)
147+
148+
private predicate flowLoadStepTCImpl(TLoadNode1Impl node1, TLoadNode1Impl node2) {
149+
loadSource(node1) and
150+
loadSink(node2) and
151+
(
152+
flowLoadPlusImpl(node1, node2)
153+
or
154+
node1 = node2
155+
)
156+
}
157+
158+
private predicate flowLoadStepTC(Node1Impl n1, Node1Impl n2, int indirectionIndex) {
159+
exists(TLoadNode1Impl node1, TLoadNode1Impl node2 |
160+
node1 = MkLoadNode1Impl(n1, pragma[only_bind_into](indirectionIndex)) and
161+
node2 = MkLoadNode1Impl(n2, pragma[only_bind_into](indirectionIndex)) and
162+
flowLoadStepTCImpl(node1, node2)
163+
)
164+
}
165+
166+
/**
167+
* Holds if the `i`'th instruction in `bb` writes to `v` through an alias.
168+
* `certain` is `true` if write is guaranteed to overwrite the entire
169+
* allocation.
170+
*/
171+
additional predicate variableWrite(
172+
BasicBlock bb, int i, SourceVariable sv, boolean certain, Node1Impl store
173+
) {
174+
certain = true and
175+
exists(
176+
Node1Impl vai, VariableAddressInstruction vaiInstr, StoreInstruction storeInstr, int index,
177+
Node1Impl dest, int k, Ssa::DefImpl def, int lower
178+
|
179+
flowStoreStepTC(vai, dest, index, k) and
180+
nodeHasInstruction1(vai, vaiInstr, index) and
181+
nodeHasOperand1(dest, storeInstr.getDestinationAddressOperand(), index - k) and
182+
sv.getIRVariable() = vaiInstr.getIRVariable() and
183+
lower =
184+
pragma[only_bind_out](getMinIndirectionsForType(storeInstr
185+
.getDestinationAddress()
186+
.getResultType())) and
187+
sv.getIndirection() = index + lower and
188+
nodeHasInstruction1(store, storeInstr, index - k) and
189+
def.getNode() = store and
190+
def.hasIndexInBlock(bb, i)
191+
)
192+
}
193+
194+
predicate variableWrite(BasicBlock bb, int i, SourceVariable sv, boolean certain) {
195+
variableWrite(bb, i, sv, certain, _)
196+
}
197+
198+
additional predicate variableRead(
199+
BasicBlock bb, int i, SourceVariable sv, boolean certain, Node1Impl load
200+
) {
201+
certain = true and
202+
exists(Node1Impl vai, int index, VariableAddressInstruction vaiInstr, Ssa::UseImpl use |
203+
flowLoadStepTC(vai, load, index) and
204+
nodeHasInstruction1(vai, vaiInstr, index) and
205+
sv.getIRVariable() = vaiInstr.getIRVariable() and
206+
sv.getIndirection() = index and
207+
use.getNode() = load and
208+
use.hasIndexInBlock(bb, i)
209+
)
210+
}
211+
212+
predicate variableRead(BasicBlock bb, int i, SourceVariable sv, boolean certain) {
213+
variableRead(bb, i, sv, certain, _)
214+
}
215+
}
216+
217+
private module AliasedSsa = SsaImplCommon::Make<Cpp::Location, SsaInput>;
218+
219+
private newtype TAliasedNode =
220+
TNode1(Node1Impl n) or
221+
TPhiNode(AliasedSsa::DefinitionExt phi) {
222+
phi instanceof AliasedSsa::PhiNode or
223+
phi instanceof AliasedSsa::PhiReadNode
224+
}
225+
226+
abstract private class AliasedNode extends TAliasedNode {
227+
abstract string toString();
228+
229+
Instruction asInstruction() { none() }
230+
231+
abstract Cpp::Function getFunction();
232+
233+
abstract predicate isGLValue();
234+
235+
abstract Cpp::Type getType();
236+
237+
abstract Cpp::Location getLocation();
238+
}
239+
240+
class AliasedNodeImpl = AliasedNode;
241+
242+
private class Node1 extends AliasedNode, TNode1 {
243+
Node1Impl n;
244+
245+
Node1() { this = TNode1(n) }
246+
247+
Node1Impl getImpl() { result = n }
248+
249+
final override string toString() { result = n.toString() }
250+
251+
final override Instruction asInstruction() { result = n.asInstruction() }
252+
253+
final override Cpp::Function getFunction() { result = n.getFunction() }
254+
255+
final override predicate isGLValue() { n.isGLValue() }
256+
257+
final override Cpp::Type getType() { result = n.getType() }
258+
259+
final override Cpp::Location getLocation() { result = n.getLocation() }
260+
}
261+
262+
private class PhiNode extends AliasedNode, TPhiNode {
263+
AliasedSsa::DefinitionExt phi;
264+
265+
PhiNode() { this = TPhiNode(phi) }
266+
267+
final override string toString() { result = phi.toString() }
268+
269+
AliasedSsa::DefinitionExt getPhi() { result = phi }
270+
271+
final override Cpp::Function getFunction() { result = phi.getBasicBlock().getEnclosingFunction() }
272+
273+
final override predicate isGLValue() { phi.getSourceVariable().isGLValue() }
274+
275+
final override Cpp::Type getType() { result = phi.getSourceVariable().getType() }
276+
277+
final override Cpp::Location getLocation() { result = phi.getLocation() }
278+
}
279+
280+
class AliasedPhiNodeImpl = PhiNode;
281+
282+
private predicate step(SsaInput::SourceVariable sv, IRBlock bb1, int i1, AliasedNode node2) {
283+
exists(AliasedSsa::DefinitionExt def, Node1Impl load, IRBlock bb2, int i2 |
284+
AliasedSsa::adjacentDefReadExt(def, sv, bb1, i1, bb2, i2) and
285+
SsaInput::variableRead(bb2, i2, sv, _, load) and
286+
TNode1(load) = node2
287+
)
288+
}
289+
290+
private predicate access(SsaInput::SourceVariable sv, IRBlock bb, int i, AliasedNode node1) {
291+
exists(Node1Impl n | node1 = TNode1(n) |
292+
SsaInput::variableWrite(bb, i, sv, _, n)
293+
or
294+
SsaInput::variableRead(bb, i, sv, _, n)
295+
)
296+
or
297+
node1.(PhiNode).getPhi().definesAt(sv, bb, i, _)
298+
}
299+
300+
private predicate stepToPhi(SsaInput::SourceVariable sv, IRBlock bb, int i, PhiNode node) {
301+
exists(AliasedSsa::DefinitionExt phi |
302+
AliasedSsa::lastRefRedefExt(_, sv, bb, i, phi) and
303+
node.getPhi() = phi
304+
)
305+
}
306+
307+
predicate into(Public::Node node1, TPhiNode node2) {
308+
exists(Node1Impl n |
309+
node1 = Nodes::TNode1(n) and
310+
aliasedFlow(TNode1(n), node2)
311+
)
312+
}
313+
314+
predicate step1(Public::Node node1, Public::Node node2) {
315+
exists(Node1Impl n1, Node1Impl n2 |
316+
node1 = Nodes::TNode1(n1) and
317+
node2 = Nodes::TNode1(n2) and
318+
aliasedFlow(TNode1(n1), TNode1(n2))
319+
)
320+
}
321+
322+
predicate step2(TPhiNode node1, TPhiNode node2) { aliasedFlow(node1, node2) }
323+
324+
predicate out(TPhiNode node1, Public::Node node2) {
325+
exists(Node1Impl n |
326+
node2 = Nodes::TNode1(n) and
327+
aliasedFlow(node1, TNode1(n))
328+
)
329+
}
330+
331+
private predicate aliasedFlow(AliasedNode node1, AliasedNode node2) {
332+
node1 != node2 and
333+
(
334+
exists(IRBlock bb, int i, SsaInput::SourceVariable sv |
335+
access(sv, bb, i, node1) and
336+
step(sv, bb, i, node2)
337+
)
338+
or
339+
exists(IRBlock bb, int i, SsaInput::SourceVariable sv |
340+
access(sv, bb, i, node1) and
341+
stepToPhi(sv, bb, i, node2)
342+
)
343+
)
344+
}

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowNodes.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ private import semmle.code.cpp.ir.IR
44
private import DataFlowImplCommon as DataFlowImplCommon
55
private import SsaInternals as Ssa
66
private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
7+
private import AliasedFlow
78

89
/**
910
* The IR dataflow graph consists of the following nodes:
@@ -33,4 +34,5 @@ newtype TIRDataFlowNode =
3334
indirectionIndex = [0 .. Ssa::getMaxIndirectionsForType(p.getUnspecifiedType()) - 1] and
3435
not any(InitializeParameterInstruction init).getParameter() = p
3536
} or
37+
TAliasedPhiNode(AliasedPhiNodeImpl n) or
3638
TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn)

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,9 @@ class InstructionNode1 extends Node1Impl, Node0 {
375375
Instruction getInstruction() { result = node.getInstruction() }
376376
}
377377

378-
private Node0 operandNode0(Operand op) { result.asOperand() = op }
378+
Node0 operandNode0(Operand op) { result.asOperand() = op }
379379

380-
private Node0 instructionNode0(Instruction i) { result.asInstruction() = i }
380+
Node0 instructionNode0(Instruction i) { result.asInstruction() = i }
381381

382382
private class RawIndirectOperand0 extends Node1Impl, TRawIndirectOperand0 {
383383
Node0Impl node;
@@ -2463,6 +2463,8 @@ private Instruction getAnInstruction(Node n) {
24632463
or
24642464
result = n.(SsaPhiNode).getPhiNode().getBasicBlock().getFirstInstruction()
24652465
or
2466+
result = n.(AliasedPhiNode).getPhi().getPhi().getBasicBlock().getFirstInstruction()
2467+
or
24662468
result = n.(SsaPhiInputNode).getBasicBlock().getFirstInstruction()
24672469
or
24682470
n.(IndirectInstruction).hasInstructionAndIndirectionIndex(result, _)

0 commit comments

Comments
 (0)