Skip to content

Commit 9ed9e68

Browse files
committed
Initial prototype improving malloc invalidation efficiency
1 parent d3992bb commit 9ed9e68

9 files changed

+190
-3
lines changed

.github/workflows/test-smoketests.yml

+3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ jobs:
5151
- name: decorator smoke test
5252
run: python test/smoketest_profile_decorator.py
5353

54+
- name: line invalidation test
55+
run: python test/test_line_invalidation.py
56+
5457
# Note: This test doesn't need to read an output,
5558
# it is meant to determine if there is an ImportError
5659
# or anything related if relative imports are used.

scalene/scalene_profiler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ def malloc_signal_handler(
546546
ByteCodeIndex(f.f_lasti),
547547
]
548548
Scalene.__alloc_sigq.put([0])
549-
pywhere.enable_settrace()
549+
pywhere.enable_settrace(this_frame)
550550
del this_frame
551551

552552
@staticmethod

src/source/pywhere.cpp

+17-2
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,15 @@ static void allocate_newline() {
268268

269269
static int trace_func(PyObject* obj, PyFrameObject* frame, int what,
270270
PyObject* arg) {
271+
if (what == PyTrace_CALL || what == PyTrace_C_CALL) {
272+
PyThreadState* tstate = PyThreadState_Get();
273+
frame->f_trace_lines = 0;
274+
frame->f_trace = NULL;
275+
#if PY_VERSION_HEX < 0x030c0000
276+
tstate->cframe->use_tracing = 0;
277+
#endif
278+
279+
}
271280
if (what != PyTrace_LINE) {
272281
return 0;
273282
}
@@ -297,7 +306,6 @@ static int trace_func(PyObject* obj, PyFrameObject* frame, int what,
297306
// Needed because decref will be called in on_stack
298307
Py_INCREF(frame);
299308
if (on_stack(last_fname_s, lineno_l, static_cast<PyFrameObject*>(frame))) {
300-
frame->f_trace_lines = 0;
301309
return 0;
302310
}
303311

@@ -370,7 +378,14 @@ static PyObject* depopulate_struct(PyObject* self, PyObject* args) {
370378
}
371379

372380
static PyObject* enable_settrace(PyObject* self, PyObject* args) {
381+
PyObject* frame;
382+
if (!PyArg_ParseTuple(args, "O", &frame)) {
383+
return NULL;
384+
}
385+
PyFrameObject* frame_obj = (PyFrameObject*) frame;
373386
PyEval_SetTrace(trace_func, NULL);
387+
// frame_obj->f_trace = &trace_func;
388+
frame_obj->f_trace_lines = 1;
374389
Py_RETURN_NONE;
375390
}
376391

@@ -389,7 +404,7 @@ static PyMethodDef EmbMethods[] = {
389404
{"print_files_to_profile", print_files_to_profile, METH_NOARGS,
390405
"printing for debug"},
391406
// {"return_buffer", return_buffer, METH_NOARGS, ""},
392-
{"enable_settrace", enable_settrace, METH_NOARGS, ""},
407+
{"enable_settrace", enable_settrace, METH_VARARGS, ""},
393408
{"disable_settrace", disable_settrace, METH_NOARGS, ""},
394409
{"populate_struct", populate_struct, METH_NOARGS, ""},
395410
{"depopulate_struct", depopulate_struct, METH_NOARGS, ""},
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
2+
def main():
3+
accum = bytes()
4+
for i in range(31):
5+
accum += bytes(10485767 * 2)
6+
7+
print(len(accum))
8+
9+
asdf = bytes(2 * 10485767)
10+
some_dead_line = None
11+
12+
13+
if __name__ == '__main__':
14+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
def main():
3+
accum = bytes()
4+
for i in range(31):
5+
accum += bytes(10485767 // 4) # far below the allocation sampling window
6+
7+
print(len(accum))
8+
9+
asdf = bytes(2 * 10485767)
10+
11+
12+
if __name__ == '__main__':
13+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
def main():
3+
accum = bytes()
4+
for i in range(31):
5+
accum += bytes(2 * 10485767) # 2x the allocation sampling window
6+
bogus = None
7+
print(len(accum))
8+
9+
asdf = bytes(2 * 10485767)
10+
11+
12+
if __name__ == '__main__':
13+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
def main():
3+
accum = bytes()
4+
for i in range(31):
5+
accum += bytes(2 * 10485767) # 2x the allocation sampling window
6+
7+
print(len(accum))
8+
9+
asdf = bytes(2 * 10485767)
10+
11+
12+
if __name__ == '__main__':
13+
main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
def main():
3+
accum = bytes()
4+
for i in range(31):
5+
accum += bytes(2 * 10485767) + bytes(2 * 10485767) # 2x the allocation sampling window
6+
7+
print(len(accum))
8+
9+
asdf = bytes(2 * 10485767)
10+
11+
if __name__ == '__main__':
12+
main()

test/test_line_invalidation.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""
2+
This is bound very closely to the current implementation of
3+
the tests in `test/line_attribution_tests.
4+
5+
The two things that matter are the number of loops, the size
6+
of the allocations, and the exact line numbers.
7+
8+
9+
"""
10+
11+
expected_md5_sums = {
12+
"line_attribution_tests/loop_below_threshold.py": "7664a7dcc0f4ab94a44e431448b5f348",
13+
"line_attribution_tests/loop_with_one_alloc.py": "da9ff0aa223123c956049e940c3ef093",
14+
"line_attribution_tests/loop_with_multiple_lines.py": "48ce0e8693fe43b1ebb7eb75a0fd5832",
15+
"line_attribution_tests/loop_with_two_allocs.py": "71f337140aa25383525e56a6167cabf8",
16+
"line_attribution_tests/line_after_final_alloc.py": "ca8cdd44ea6e4a9c286c05facae6a721"
17+
}
18+
19+
import subprocess
20+
import tempfile
21+
import sys
22+
from typing import List
23+
from pathlib import Path
24+
from hashlib import md5
25+
from scalene.scalene_json import ScaleneJSONSchema
26+
27+
N_LOOPS = 31
28+
LOOP_ALLOC_LINENO = 5 #
29+
OUT_OF_LOOP_ALLOC_LINENO = 9
30+
31+
def check_for_changes():
32+
errors = []
33+
for fname, expected_sum in expected_md5_sums.items():
34+
with open(fname, 'rb') as f:
35+
digest = md5(f.read()).hexdigest()
36+
if digest != expected_sum:
37+
errors.append(fname)
38+
assert len(errors) == 0, f'Detected change in file(s) {','.join(errors)}'
39+
40+
def get_line(scalene_profile: ScaleneJSONSchema, lineno: int):
41+
files = list(scalene_profile.files.keys())
42+
assert len(files) == 1
43+
filename = files[0]
44+
return scalene_profile.files[filename].lines[lineno - 1]
45+
46+
47+
48+
49+
def get_profile(test_stem, outdir_p, test_dir):
50+
proc = subprocess.run(
51+
[
52+
sys.executable,
53+
"-m",
54+
"scalene",
55+
"--cli",
56+
"--json",
57+
"--outfile",
58+
outdir_p / f"{test_stem}.json",
59+
test_dir / f"{test_stem}.py",
60+
],
61+
capture_output=True,
62+
check=True,
63+
)
64+
with open(outdir_p / f"{test_stem}.json", "r") as f:
65+
profile = ScaleneJSONSchema.model_validate_json(f.read())
66+
return (test_stem, profile)
67+
68+
69+
def main():
70+
test_dir = Path(__file__).parent / "line_attribution_tests"
71+
with tempfile.TemporaryDirectory() as outdir:
72+
outdir_p = Path(outdir)
73+
one_alloc = get_profile("loop_with_one_alloc", outdir_p, test_dir)
74+
two_on_one_line = get_profile("loop_with_two_allocs", outdir_p, test_dir)
75+
below_threshold = get_profile("loop_below_threshold", outdir_p, test_dir)
76+
line_after_final_alloc = get_profile(
77+
"line_after_final_alloc", outdir_p, test_dir
78+
)
79+
errors = []
80+
for stem, profile in [one_alloc, two_on_one_line, line_after_final_alloc]:
81+
line = get_line(profile, LOOP_ALLOC_LINENO)
82+
if not line.n_mallocs == N_LOOPS:
83+
errors.append(f"Expected {N_LOOPS} distinct lines on {stem}, got {line.n_mallocs} on {LOOP_ALLOC_LINENO}")
84+
85+
bt_stem, bt_prof = below_threshold
86+
bt_line = get_line(bt_prof, LOOP_ALLOC_LINENO)
87+
if not bt_line.n_mallocs < N_LOOPS:
88+
errors.append(f"{bt_stem} makes smaller allocations than the allocation sampling window, so fewer than {N_LOOPS} allocations on {LOOP_ALLOC_LINENO} should be reported. Got {bt_line.n_mallocs}")
89+
90+
for stem, profile in [one_alloc, two_on_one_line, below_threshold, line_after_final_alloc]:
91+
line = get_line(profile, OUT_OF_LOOP_ALLOC_LINENO)
92+
if not line.n_mallocs == 1:
93+
errors.append(f'Line {OUT_OF_LOOP_ALLOC_LINENO} in {stem} makes a large allocation, so it should be reported.')
94+
95+
if len(errors) > 0:
96+
for error in errors:
97+
print(f'ERROR: {error}')
98+
exit(1)
99+
else:
100+
print("PASS")
101+
exit(0)
102+
103+
if __name__ == '__main__':
104+
main()

0 commit comments

Comments
 (0)