From d7505b140a91391a33127cbf2c1aef8b92e018ea Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 13 Dec 2023 12:00:21 +0000 Subject: [PATCH] gh-112962: in dis module, put cache information in the Instruction instead of creating fake Instructions to represent it (#113016) --- Doc/library/dis.rst | 18 ++++- Lib/dis.py | 73 +++++++++++-------- Lib/test/support/bytecode_helper.py | 12 +++ Lib/test/test_code.py | 8 +- Lib/test/test_compile.py | 5 +- Lib/test/test_dis.py | 41 +++++++++-- ...-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst | 3 + 7 files changed, 114 insertions(+), 46 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 0d93bc9f5da774c..5647021d6a9ba6b 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -328,13 +328,17 @@ operation is being performed, so the intermediate analysis object isn't useful: source line information (if any) is taken directly from the disassembled code object. - The *show_caches* and *adaptive* parameters work as they do in :func:`dis`. + The *adaptive* parameter works as it does in :func:`dis`. .. versionadded:: 3.4 .. versionchanged:: 3.11 Added the *show_caches* and *adaptive* parameters. + .. versionchanged:: 3.13 + The *show_caches* parameter is deprecated and has no effect. The *cache_info* + field of each instruction is populated regardless of its value. + .. function:: findlinestarts(code) @@ -482,6 +486,14 @@ details of bytecode instructions as :class:`Instruction` instances: :class:`dis.Positions` object holding the start and end locations that are covered by this instruction. + .. data::cache_info + + Information about the cache entries of this instruction, as + triplets of the form ``(name, size, data)``, where the ``name`` + and ``size`` describe the cache format and data is the contents + of the cache. ``cache_info`` is ``None`` if the instruction does not have + caches. + .. versionadded:: 3.4 .. versionchanged:: 3.11 @@ -493,8 +505,8 @@ details of bytecode instructions as :class:`Instruction` instances: Changed field ``starts_line``. Added fields ``start_offset``, ``cache_offset``, ``end_offset``, - ``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``, and - ``line_number``. + ``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``, + ``line_number`` and ``cache_info``. .. class:: Positions diff --git a/Lib/dis.py b/Lib/dis.py index efa935c5a6a0b6f..183091cb0d60988 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -267,9 +267,10 @@ def show_code(co, *, file=None): 'starts_line', 'line_number', 'label', - 'positions' + 'positions', + 'cache_info', ], - defaults=[None, None] + defaults=[None, None, None] ) _Instruction.opname.__doc__ = "Human readable name for operation" @@ -286,6 +287,7 @@ def show_code(co, *, file=None): _Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None" _Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None" _Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction" +_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction" _ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase", "start end target depth lasti") @@ -334,6 +336,8 @@ class Instruction(_Instruction): label - A label if this instruction is a jump target, otherwise None positions - Optional dis.Positions object holding the span of source code covered by this instruction + cache_info - information about the format and content of the instruction's cache + entries (if any) """ @property @@ -570,7 +574,6 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): linestarts=linestarts, line_offset=line_offset, co_positions=co.co_positions(), - show_caches=show_caches, original_code=original_code, arg_resolver=arg_resolver) @@ -645,8 +648,7 @@ def _is_backward_jump(op): 'ENTER_EXECUTOR') def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None, - show_caches=False, original_code=None, labels_map=None, - arg_resolver=None): + original_code=None, labels_map=None, arg_resolver=None): """Iterate over the instructions in a bytecode string. Generates a sequence of Instruction namedtuples giving the details of each @@ -682,32 +684,28 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N else: argval, argrepr = arg, repr(arg) + instr = Instruction(_all_opname[op], op, arg, argval, argrepr, + offset, start_offset, starts_line, line_number, + labels_map.get(offset, None), positions) + + caches = _get_cache_size(_all_opname[deop]) + # Advance the co_positions iterator: + for _ in range(caches): + next(co_positions, ()) + + if caches: + cache_info = [] + for name, size in _cache_format[opname[deop]].items(): + data = code[offset + 2: offset + 2 + 2 * size] + cache_info.append((name, size, data)) + else: + cache_info = None + yield Instruction(_all_opname[op], op, arg, argval, argrepr, offset, start_offset, starts_line, line_number, - labels_map.get(offset, None), positions) + labels_map.get(offset, None), positions, cache_info) + - caches = _get_cache_size(_all_opname[deop]) - if not caches: - continue - if not show_caches: - # We still need to advance the co_positions iterator: - for _ in range(caches): - next(co_positions, ()) - continue - for name, size in _cache_format[opname[deop]].items(): - for i in range(size): - offset += 2 - # Only show the fancy argrepr for a CACHE instruction when it's - # the first entry for a particular cache value: - if i == 0: - data = code[offset: offset + 2 * size] - argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" - else: - argrepr = "" - yield Instruction( - "CACHE", CACHE, 0, None, argrepr, offset, offset, False, None, None, - Positions(*next(co_positions, ())) - ) def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, show_offsets=False): @@ -787,7 +785,6 @@ def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, instrs = _get_instructions_bytes(code, linestarts=linestarts, line_offset=line_offset, co_positions=co_positions, - show_caches=show_caches, original_code=original_code, labels_map=labels_map, arg_resolver=arg_resolver) @@ -805,6 +802,23 @@ def print_instructions(instrs, exception_entries, formatter, show_caches=False, is_current_instr = instr.offset <= lasti \ <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) formatter.print_instruction(instr, is_current_instr) + deop = _deoptop(instr.opcode) + if show_caches and instr.cache_info: + offset = instr.offset + for name, size, data in instr.cache_info: + for i in range(size): + offset += 2 + # Only show the fancy argrepr for a CACHE instruction when it's + # the first entry for a particular cache value: + if i == 0: + argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" + else: + argrepr = "" + formatter.print_instruction( + Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, + False, None, None, instr.positions), + is_current_instr) + formatter.print_exception_table(exception_entries) def _disassemble_str(source, **kwargs): @@ -952,7 +966,6 @@ def __iter__(self): linestarts=self._linestarts, line_offset=self._line_offset, co_positions=co.co_positions(), - show_caches=self.show_caches, original_code=original_code, labels_map=labels_map, arg_resolver=arg_resolver) diff --git a/Lib/test/support/bytecode_helper.py b/Lib/test/support/bytecode_helper.py index 388d1266773c8a5..a4845065a5322e3 100644 --- a/Lib/test/support/bytecode_helper.py +++ b/Lib/test/support/bytecode_helper.py @@ -7,6 +7,18 @@ _UNSPECIFIED = object() +def instructions_with_positions(instrs, co_positions): + # Return (instr, positions) pairs from the instrs list and co_positions + # iterator. The latter contains items for cache lines and the former + # doesn't, so those need to be skipped. + + co_positions = co_positions or iter(()) + for instr in instrs: + yield instr, next(co_positions, ()) + for _, size, _ in (instr.cache_info or ()): + for i in range(size): + next(co_positions, ()) + class BytecodeTestCase(unittest.TestCase): """Custom assertion methods for inspecting bytecode.""" diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index a961ddbe17a3d32..d8fb826edeb6816 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -144,6 +144,8 @@ gc_collect) from test.support.script_helper import assert_python_ok from test.support import threading_helper +from test.support.bytecode_helper import (BytecodeTestCase, + instructions_with_positions) from opcode import opmap, opname COPY_FREE_VARS = opmap['COPY_FREE_VARS'] @@ -384,10 +386,8 @@ def test_co_positions_artificial_instructions(self): code = traceback.tb_frame.f_code artificial_instructions = [] - for instr, positions in zip( - dis.get_instructions(code, show_caches=True), - code.co_positions(), - strict=True + for instr, positions in instructions_with_positions( + dis.get_instructions(code), code.co_positions() ): # If any of the positions is None, then all have to # be None as well for the case above. There are still diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index df6e5e4b55f7281..f681d125db7d7a1 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -12,6 +12,7 @@ from test import support from test.support import (script_helper, requires_debug_ranges, requires_specialization, Py_C_RECURSION_LIMIT) +from test.support.bytecode_helper import instructions_with_positions from test.support.os_helper import FakePath class TestSpecifics(unittest.TestCase): @@ -1346,8 +1347,8 @@ def generic_visit(self, node): def assertOpcodeSourcePositionIs(self, code, opcode, line, end_line, column, end_column, occurrence=1): - for instr, position in zip( - dis.Bytecode(code, show_caches=True), code.co_positions(), strict=True + for instr, position in instructions_with_positions( + dis.Bytecode(code), code.co_positions() ): if instr.opname == opcode: occurrence -= 1 diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 0ea4dc4566a4a45..12e2c57e50b0ba5 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -13,6 +13,7 @@ import opcode +CACHE = dis.opmap["CACHE"] def get_tb(): def _error(): @@ -1227,9 +1228,9 @@ def f(): else: # "copy" the code to un-quicken it: f.__code__ = f.__code__.replace() - for instruction in dis.get_instructions( + for instruction in _unroll_caches_as_Instructions(dis.get_instructions( f, show_caches=True, adaptive=adaptive - ): + ), show_caches=True): if instruction.opname == "CACHE": yield instruction.argrepr @@ -1262,7 +1263,8 @@ def f(): # However, this might change in the future. So we explicitly try to find # a CACHE entry in the instructions. If we can't do that, fail the test - for inst in dis.get_instructions(f, show_caches=True): + for inst in _unroll_caches_as_Instructions( + dis.get_instructions(f, show_caches=True), show_caches=True): if inst.opname == "CACHE": op_offset = inst.offset - 2 cache_offset = inst.offset @@ -1775,8 +1777,8 @@ def simple(): pass class InstructionTestCase(BytecodeTestCase): def assertInstructionsEqual(self, instrs_1, instrs_2, /): - instrs_1 = [instr_1._replace(positions=None) for instr_1 in instrs_1] - instrs_2 = [instr_2._replace(positions=None) for instr_2 in instrs_2] + instrs_1 = [instr_1._replace(positions=None, cache_info=None) for instr_1 in instrs_1] + instrs_2 = [instr_2._replace(positions=None, cache_info=None) for instr_2 in instrs_2] self.assertEqual(instrs_1, instrs_2) class InstructionTests(InstructionTestCase): @@ -1890,9 +1892,9 @@ def roots(a, b, c): instruction.positions.col_offset, instruction.positions.end_col_offset, ) - for instruction in dis.get_instructions( + for instruction in _unroll_caches_as_Instructions(dis.get_instructions( code, adaptive=adaptive, show_caches=show_caches - ) + ), show_caches=show_caches) ] self.assertEqual(co_positions, dis_positions) @@ -2233,6 +2235,31 @@ def get_disassembly(self, tb): dis.distb(tb, file=output) return output.getvalue() +def _unroll_caches_as_Instructions(instrs, show_caches=False): + # Cache entries are no longer reported by dis as fake instructions, + # but some tests assume that do. We should rewrite the tests to assume + # the new API, but it will be clearer to keep the tests working as + # before and do that in a separate PR. + + for instr in instrs: + yield instr + if not show_caches: + continue + + offset = instr.offset + for name, size, data in (instr.cache_info or ()): + for i in range(size): + offset += 2 + # Only show the fancy argrepr for a CACHE instruction when it's + # the first entry for a particular cache value: + if i == 0: + argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" + else: + argrepr = "" + + yield Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, + False, None, None, instr.positions) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst b/Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst new file mode 100644 index 000000000000000..b99e6bc90ae7912 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst @@ -0,0 +1,3 @@ +:mod:`dis` module functions add cache information to the +:class:`~dis.Instruction` instance rather than creating fake +:class:`~dis.Instruction` instances to represent the cache entries.