Skip to content

Commit c3b932c

Browse files
authored
Explicit Call Graph (#619)
* Start adding explicit call graph * Integrate CallGraph into LLVMBasedICFG * Add CallGraph interface and integrate it into the ICFG * Allow constructing LLVMBasedICFG with already given CallGraph * pre-compile Callgraph with LLVM config * Fix deserialization * some cleanup of LLVMBasedICFG * Make dot printing part of the call graph
1 parent a586558 commit c3b932c

File tree

9 files changed

+475
-254
lines changed

9 files changed

+475
-254
lines changed
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
/******************************************************************************
2+
* Copyright (c) 2023 Fabian Schiebel.
3+
* All rights reserved. This program and the accompanying materials are made
4+
* available under the terms of LICENSE.txt.
5+
*
6+
* Contributors:
7+
* Fabian Schiebel and others
8+
*****************************************************************************/
9+
10+
#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_CALLGRAPH_H
11+
#define PHASAR_PHASARLLVM_CONTROLFLOW_CALLGRAPH_H
12+
13+
#include "phasar/ControlFlow/CallGraphBase.h"
14+
#include "phasar/Utils/ByRef.h"
15+
#include "phasar/Utils/Logger.h"
16+
#include "phasar/Utils/StableVector.h"
17+
#include "phasar/Utils/Utilities.h"
18+
19+
#include "llvm/ADT/ArrayRef.h"
20+
#include "llvm/ADT/DenseMap.h"
21+
#include "llvm/ADT/STLExtras.h"
22+
23+
#include "nlohmann/json.hpp"
24+
25+
#include <functional>
26+
#include <utility>
27+
#include <vector>
28+
29+
namespace psr {
30+
template <typename N, typename F> class CallGraphBuilder;
31+
template <typename N, typename F> class CallGraph;
32+
33+
template <typename N, typename F> struct CGTraits<CallGraph<N, F>> {
34+
using n_t = N;
35+
using f_t = F;
36+
};
37+
38+
/// An explicit graph-representation of a call-graph. Only represents the data,
39+
/// not the call-graph analysis that creates it.
40+
///
41+
/// This type is immutable. To incrementally build it from your call-graph
42+
/// analysis, use the CallGraphBuilder
43+
template <typename N, typename F>
44+
class CallGraph : public CallGraphBase<CallGraph<N, F>> {
45+
using base_t = CallGraphBase<CallGraph<N, F>>;
46+
friend base_t;
47+
friend class CallGraphBuilder<N, F>;
48+
49+
public:
50+
using typename base_t::f_t;
51+
using typename base_t::n_t;
52+
using FunctionVertexTy = llvm::SmallVector<n_t>;
53+
using InstructionVertexTy = llvm::SmallVector<f_t>;
54+
55+
/// Creates a new, empty call-graph
56+
CallGraph() noexcept = default;
57+
58+
/// Deserializes a previously computed call-graph
59+
template <typename FunctionGetter, typename InstructionGetter>
60+
[[nodiscard]] static CallGraph
61+
deserialize(const nlohmann::json &PrecomputedCG,
62+
FunctionGetter GetFunctionFromName,
63+
InstructionGetter GetInstructionFromId);
64+
65+
/// A range of all functions that are vertices in the call-graph. The number
66+
/// of vertex functions can be retrieved by getNumVertexFunctions().
67+
[[nodiscard]] auto getAllVertexFunctions() const noexcept {
68+
return llvm::make_first_range(CallersOf);
69+
}
70+
71+
/// A range of all call-sites that are vertices in the call-graph. The number
72+
/// of vertex-callsites can be retrived by getNumVertexCallSites().
73+
[[nodiscard]] auto getAllVertexCallSites() const noexcept {
74+
return llvm::make_first_range(CalleesAt);
75+
}
76+
77+
[[nodiscard]] size_t getNumVertexFunctions() const noexcept {
78+
return CallersOf.size();
79+
}
80+
[[nodiscard]] size_t getNumVertexCallSites() const noexcept {
81+
return CalleesAt.size();
82+
}
83+
84+
/// The number of functions within this call-graph
85+
[[nodiscard]] size_t size() const noexcept { return getNumVertexFunctions(); }
86+
87+
[[nodiscard]] bool empty() const noexcept { return CallersOf.empty(); }
88+
89+
/// Creates a JSON representation of this call-graph suitable for presistent
90+
/// storage.
91+
/// Use the ctor taking a json object for deserialization
92+
template <typename FunctionIdGetter, typename InstIdGetter>
93+
[[nodiscard]] nlohmann::json getAsJson(FunctionIdGetter GetFunctionId,
94+
InstIdGetter GetInstructionId) const {
95+
nlohmann::json J;
96+
97+
for (const auto &[Fun, Callers] : CallersOf) {
98+
auto &JCallers = J[std::invoke(GetFunctionId, Fun)];
99+
100+
for (const auto &CS : *Callers) {
101+
JCallers.push_back(std::invoke(GetInstructionId, CS));
102+
}
103+
}
104+
105+
return J;
106+
}
107+
108+
template <typename FunctionLabelGetter, typename InstParentGetter,
109+
typename InstLabelGetter>
110+
void printAsDot(llvm::raw_ostream &OS, FunctionLabelGetter GetFunctionLabel,
111+
InstParentGetter GetFunctionFromInst,
112+
InstLabelGetter GetInstLabel) const {
113+
OS << "digraph CallGraph{\n";
114+
scope_exit CloseBrace = [&OS] { OS << "}\n"; };
115+
116+
llvm::DenseMap<f_t, size_t> Fun2Id;
117+
Fun2Id.reserve(CallersOf.size());
118+
119+
size_t CurrId = 0;
120+
for (const auto &Fun : getAllVertexFunctions()) {
121+
OS << CurrId << "[label=\"";
122+
OS.write_escaped(std::invoke(GetFunctionLabel, Fun)) << "\"];\n";
123+
Fun2Id[Fun] = CurrId++;
124+
}
125+
126+
for (const auto &[CS, Callees] : CalleesAt) {
127+
const auto &Fun = std::invoke(GetFunctionFromInst, CS);
128+
129+
for (const auto &Succ : *Callees) {
130+
OS << Fun2Id.lookup(Fun) << "->" << Fun2Id.lookup(Succ) << "[label=\"";
131+
OS.write_escaped(std::invoke(GetInstLabel, CS)) << "\"];\n";
132+
}
133+
}
134+
}
135+
136+
private:
137+
[[nodiscard]] llvm::ArrayRef<f_t>
138+
getCalleesOfCallAtImpl(ByConstRef<n_t> Inst) const noexcept {
139+
if (const auto *CalleesPtr = CalleesAt.lookup(Inst)) {
140+
return *CalleesPtr;
141+
}
142+
return {};
143+
}
144+
145+
[[nodiscard]] llvm::ArrayRef<n_t>
146+
getCallersOfImpl(ByConstRef<f_t> Fun) const noexcept {
147+
if (const auto *CallersPtr = CallersOf.lookup(Fun)) {
148+
return *CallersPtr;
149+
}
150+
return {};
151+
}
152+
153+
// ---
154+
155+
StableVector<InstructionVertexTy> InstVertexOwner;
156+
std::vector<FunctionVertexTy> FunVertexOwner;
157+
158+
llvm::DenseMap<N, InstructionVertexTy *> CalleesAt{};
159+
llvm::DenseMap<F, FunctionVertexTy *> CallersOf{};
160+
};
161+
162+
/// A mutable wrapper over a CallGraph. Use this to build a call-graph from
163+
/// within your call-graph ananlysis.
164+
template <typename N, typename F> class CallGraphBuilder {
165+
public:
166+
using n_t = typename CallGraph<N, F>::n_t;
167+
using f_t = typename CallGraph<N, F>::f_t;
168+
using FunctionVertexTy = typename CallGraph<n_t, f_t>::FunctionVertexTy;
169+
using InstructionVertexTy = typename CallGraph<n_t, f_t>::InstructionVertexTy;
170+
171+
void reserve(size_t MaxNumFunctions) {
172+
CG.FunVertexOwner.reserve(MaxNumFunctions);
173+
CG.CalleesAt.reserve(MaxNumFunctions);
174+
CG.CallersOf.reserve(MaxNumFunctions);
175+
}
176+
177+
/// Registeres a new function in the call-graph. Returns a list of all
178+
/// call-sites that are known so far to potentially call this function.
179+
/// Do not manually add elements to this vector -- use addCallEdge instead.
180+
[[nodiscard]] FunctionVertexTy *addFunctionVertex(f_t Fun) {
181+
auto [It, Inserted] = CG.CallersOf.try_emplace(std::move(Fun), nullptr);
182+
if (Inserted) {
183+
auto Cap = CG.FunVertexOwner.capacity();
184+
assert(CG.FunVertexOwner.size() < Cap &&
185+
"Trying to add more than MaxNumFunctions Function Vertices");
186+
It->second = &CG.FunVertexOwner.emplace_back();
187+
}
188+
return It->second;
189+
}
190+
191+
/// Registeres a new call-site in the call-graph. Returns a list of all
192+
/// callee functions that are known so far to potentially be called by this
193+
/// function.
194+
/// Do not manually add elements to this vector -- use addCallEdge instead.
195+
[[nodiscard]] InstructionVertexTy *addInstructionVertex(n_t Inst) {
196+
auto [It, Inserted] = CG.CalleesAt.try_emplace(std::move(Inst), nullptr);
197+
if (Inserted) {
198+
It->second = &CG.InstVertexOwner.emplace_back();
199+
}
200+
return It->second;
201+
}
202+
203+
/// Tries to lookup the InstructionVertex for the given call-site. Returns
204+
/// nullptr on failure.
205+
[[nodiscard]] InstructionVertexTy *
206+
getInstVertexOrNull(ByConstRef<n_t> Inst) const noexcept {
207+
return CG.CalleesAt.lookup(Inst);
208+
}
209+
210+
/// Adds a new directional edge to the call-graph indicating that CS may call
211+
/// Callee
212+
void addCallEdge(n_t CS, f_t Callee) {
213+
auto IVtx = addInstructionVertex(CS);
214+
auto FVtx = addFunctionVertex(Callee);
215+
addCallEdge(std::move(CS), IVtx, std::move(Callee), FVtx);
216+
}
217+
218+
/// Same as addCallEdge(n_t, f_t), but uses an already known
219+
/// InstructionVertexTy to save a lookup
220+
void addCallEdge(n_t CS, InstructionVertexTy *Callees, f_t Callee) {
221+
auto *Callers = addFunctionVertex(Callee);
222+
addCallEdge(std::move(CS), Callees, std::move(Callee), Callers);
223+
}
224+
225+
/// Same as addCallEdge(n_t, f_t), but uses an already known
226+
/// FunctionVertexTy to save a lookup
227+
void addCallEdge(n_t CS, f_t Callee, FunctionVertexTy *Callers) {
228+
auto *Callees = addInstructionVertex(CS);
229+
addCallEdge(std::move(CS), Callees, std::move(Callee), Callers);
230+
}
231+
232+
/// Moves the completely built call-graph out of this builder for further
233+
/// use. Do not use the builder after it anymore.
234+
[[nodiscard]] CallGraph<n_t, f_t> consumeCallGraph() noexcept {
235+
return std::move(CG);
236+
}
237+
238+
/// Returns a view on the current (partial) call-graph that has already been
239+
/// constructed
240+
[[nodiscard]] const CallGraph<n_t, f_t> &viewCallGraph() const noexcept {
241+
return CG;
242+
}
243+
244+
private:
245+
void addCallEdge(n_t CS, InstructionVertexTy *Callees, f_t Callee,
246+
FunctionVertexTy *Callers) {
247+
Callees->push_back(std::move(Callee));
248+
Callers->push_back(std::move(CS));
249+
}
250+
251+
CallGraph<n_t, f_t> CG{};
252+
};
253+
254+
template <typename N, typename F>
255+
template <typename FunctionGetter, typename InstructionGetter>
256+
[[nodiscard]] CallGraph<N, F>
257+
CallGraph<N, F>::deserialize(const nlohmann::json &PrecomputedCG,
258+
FunctionGetter GetFunctionFromName,
259+
InstructionGetter GetInstructionFromId) {
260+
if (!PrecomputedCG.is_object()) {
261+
PHASAR_LOG_LEVEL_CAT(ERROR, "CallGraph", "Invalid Json. Expected object");
262+
return {};
263+
}
264+
265+
CallGraphBuilder<N, F> CGBuilder;
266+
CGBuilder.reserve(PrecomputedCG.size());
267+
268+
for (const auto &[FunName, CallerIDs] : PrecomputedCG.items()) {
269+
const auto &Fun = std::invoke(GetFunctionFromName, FunName);
270+
if (!Fun) {
271+
PHASAR_LOG_LEVEL_CAT(WARNING, "CallGraph",
272+
"Invalid function name: " << FunName);
273+
continue;
274+
}
275+
276+
auto *CEdges = CGBuilder.addFunctionVertex(Fun);
277+
CEdges->reserve(CallerIDs.size());
278+
279+
for (const auto &JId : CallerIDs) {
280+
auto Id = JId.get<size_t>();
281+
const auto &CS = std::invoke(GetInstructionFromId, Id);
282+
if (!CS) {
283+
PHASAR_LOG_LEVEL_CAT(WARNING, "CallGraph",
284+
"Invalid CAll-Instruction Id: " << Id);
285+
}
286+
287+
CGBuilder.addCallEdge(CS, Fun);
288+
}
289+
}
290+
return CGBuilder.consumeCallGraph();
291+
}
292+
} // namespace psr
293+
294+
namespace llvm {
295+
class Function;
296+
class Instruction;
297+
} // namespace llvm
298+
299+
extern template class psr::CallGraph<const llvm::Instruction *,
300+
const llvm::Function *>;
301+
extern template class psr::CallGraphBuilder<const llvm::Instruction *,
302+
const llvm::Function *>;
303+
304+
#endif // PHASAR_PHASARLLVM_CONTROLFLOW_CALLGRAPH_H
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/******************************************************************************
2+
* Copyright (c) 2023 Fabian Schiebel.
3+
* All rights reserved. This program and the accompanying materials are made
4+
* available under the terms of LICENSE.txt.
5+
*
6+
* Contributors:
7+
* Fabian Schiebel and others
8+
*****************************************************************************/
9+
10+
#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_CALLGRAPHBASE_H
11+
#define PHASAR_PHASARLLVM_CONTROLFLOW_CALLGRAPHBASE_H
12+
13+
#include "phasar/Utils/ByRef.h"
14+
#include "phasar/Utils/TypeTraits.h"
15+
16+
#include "nlohmann/json.hpp"
17+
18+
namespace psr {
19+
template <typename T> struct CGTraits {
20+
// using n_t
21+
// using f_t
22+
};
23+
24+
/// Base class of all CallGraph implementations within phasar (currently only
25+
/// CallGraph<N, F>).
26+
/// Only represents the data, not how to create it.
27+
template <typename Derived> class CallGraphBase {
28+
public:
29+
using n_t = typename CGTraits<Derived>::n_t;
30+
using f_t = typename CGTraits<Derived>::f_t;
31+
32+
/// Returns an iterable range of all possible callee candidates at the given
33+
/// call-site induced by the used call-graph.
34+
///
35+
/// NOTE: This function is typically called in a hot part of the analysis and
36+
/// should therefore be very fast
37+
[[nodiscard]] decltype(auto) getCalleesOfCallAt(ByConstRef<n_t> Inst) const
38+
noexcept(noexcept(self().getCalleesOfCallAtImpl(Inst))) {
39+
static_assert(
40+
is_iterable_over_v<decltype(self().getCalleesOfCallAtImpl(Inst)), f_t>);
41+
return self().getCalleesOfCallAtImpl(Inst);
42+
}
43+
44+
/// Returns an iterable range of all possible call-site candidates that may
45+
/// call the given function induced by the used call-graph.
46+
[[nodiscard]] decltype(auto) getCallersOf(ByConstRef<f_t> Fun) const {
47+
static_assert(
48+
is_iterable_over_v<decltype(self().getCallersOfImpl(Fun)), n_t>);
49+
return self().getCallersOfImpl(Fun);
50+
}
51+
52+
private:
53+
const Derived &self() const noexcept {
54+
return static_cast<const Derived &>(*this);
55+
}
56+
};
57+
} // namespace psr
58+
59+
#endif // PHASAR_PHASARLLVM_CONTROLFLOW_CALLGRAPHBASE_H

0 commit comments

Comments
 (0)