From 0d07182821fad7b95a043d006f1ce13a2d22edcb Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Fri, 31 May 2024 00:49:03 -0700 Subject: [PATCH 001/373] gh-111201: Support pyrepl on Windows (#119559) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Anthony Shaw Co-authored-by: Łukasz Langa --- Doc/whatsnew/3.13.rst | 13 +- Lib/_pyrepl/__main__.py | 6 +- Lib/_pyrepl/console.py | 30 +- Lib/_pyrepl/reader.py | 16 +- Lib/_pyrepl/readline.py | 11 +- Lib/_pyrepl/simple_interact.py | 6 +- Lib/_pyrepl/unix_console.py | 28 +- Lib/_pyrepl/windows_console.py | 587 ++++++++++++++++++ Lib/test/test_pyrepl/__init__.py | 12 +- Lib/test/test_pyrepl/support.py | 2 +- Lib/test/test_pyrepl/test_pyrepl.py | 5 +- Lib/test/test_pyrepl/test_unix_console.py | 11 +- Lib/test/test_pyrepl/test_unix_eventqueue.py | 10 +- Lib/test/test_pyrepl/test_windows_console.py | 331 ++++++++++ ...-05-25-18-43-10.gh-issue-111201.SLPJIx.rst | 1 + 15 files changed, 1020 insertions(+), 49 deletions(-) create mode 100644 Lib/_pyrepl/windows_console.py create mode 100644 Lib/test/test_pyrepl/test_windows_console.py create mode 100644 Misc/NEWS.d/next/Windows/2024-05-25-18-43-10.gh-issue-111201.SLPJIx.rst diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 241c07e781af1f..29bb3b81f6323c 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -154,10 +154,10 @@ New Features A Better Interactive Interpreter -------------------------------- -On Unix-like systems like Linux or macOS, Python now uses a new -:term:`interactive` shell. When the user starts the :term:`REPL` from an -interactive terminal, and both :mod:`curses` and :mod:`readline` are -available, the interactive shell now supports the following new features: +On Unix-like systems like Linux or macOS as well as Windows, Python now +uses a new :term:`interactive` shell. When the user starts the +:term:`REPL` from an interactive terminal the interactive shell now +supports the following new features: * Colorized prompts. * Multiline editing with history preservation. @@ -174,10 +174,13 @@ available, the interactive shell now supports the following new features: If the new interactive shell is not desired, it can be disabled via the :envvar:`PYTHON_BASIC_REPL` environment variable. +The new shell requires :mod:`curses` on Unix-like systems. + For more on interactive mode, see :ref:`tut-interac`. (Contributed by Pablo Galindo Salgado, Łukasz Langa, and -Lysandros Nikolaou in :gh:`111201` based on code from the PyPy project.) +Lysandros Nikolaou in :gh:`111201` based on code from the PyPy project. +Windows support contributed by Dino Viehland and Anthony Shaw.) .. _whatsnew313-improved-error-messages: diff --git a/Lib/_pyrepl/__main__.py b/Lib/_pyrepl/__main__.py index c598019e7cd4ad..dae4ba6e178b9a 100644 --- a/Lib/_pyrepl/__main__.py +++ b/Lib/_pyrepl/__main__.py @@ -1,7 +1,11 @@ import os import sys -CAN_USE_PYREPL = sys.platform != "win32" +CAN_USE_PYREPL: bool +if sys.platform != "win32": + CAN_USE_PYREPL = True +else: + CAN_USE_PYREPL = sys.getwindowsversion().build >= 10586 # Windows 10 TH2 def interactive_console(mainmodule=None, quiet=False, pythonstartup=False): diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index d7e86e768671dc..fcabf785069ecb 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -19,10 +19,18 @@ from __future__ import annotations +import sys + from abc import ABC, abstractmethod from dataclasses import dataclass, field +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + + @dataclass class Event: evt: str @@ -36,6 +44,25 @@ class Console(ABC): height: int = 25 width: int = 80 + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + self.encoding = encoding or sys.getdefaultencoding() + + if isinstance(f_in, int): + self.input_fd = f_in + else: + self.input_fd = f_in.fileno() + + if isinstance(f_out, int): + self.output_fd = f_out + else: + self.output_fd = f_out.fileno() + @abstractmethod def refresh(self, screen: list[str], xy: tuple[int, int]) -> None: ... @@ -108,5 +135,4 @@ def wait(self) -> None: ... @abstractmethod - def repaint(self) -> None: - ... + def repaint(self) -> None: ... diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index d2960bbb6121b3..0045425cdddb79 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -442,14 +442,13 @@ def get_arg(self, default: int = 1) -> int: """ if self.arg is None: return default - else: - return self.arg + return self.arg def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: """Return what should be in the left-hand margin for line 'lineno'.""" if self.arg is not None and cursor_on_line: - prompt = "(arg: %s) " % self.arg + prompt = f"(arg: {self.arg}) " elif self.paste_mode: prompt = "(paste) " elif "\n" in self.buffer: @@ -515,12 +514,12 @@ def pos2xy(self) -> tuple[int, int]: offset = l - 1 if in_wrapped_line else l # need to remove backslash if offset >= pos: break + + if p + sum(l2) >= self.console.width: + pos -= l - 1 # -1 cause backslash is not in buffer else: - if p + sum(l2) >= self.console.width: - pos -= l - 1 # -1 cause backslash is not in buffer - else: - pos -= l + 1 # +1 cause newline is in buffer - y += 1 + pos -= l + 1 # +1 cause newline is in buffer + y += 1 return p + sum(l2[:pos]), y def insert(self, text: str | list[str]) -> None: @@ -582,7 +581,6 @@ def suspend(self) -> SimpleContextManager: for arg in ("msg", "ps1", "ps2", "ps3", "ps4", "paste_mode"): setattr(self, arg, prev_state[arg]) self.prepare() - pass def finish(self) -> None: """Called when a command signals that we're finished.""" diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index ffa14a9ce31a8f..248f3854a29689 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -38,7 +38,14 @@ from . import commands, historical_reader from .completing_reader import CompletingReader -from .unix_console import UnixConsole, _error +from .console import Console as ConsoleType + +Console: type[ConsoleType] +_error: tuple[type[Exception], ...] | type[Exception] +try: + from .unix_console import UnixConsole as Console, _error +except ImportError: + from .windows_console import WindowsConsole as Console, _error ENCODING = sys.getdefaultencoding() or "latin1" @@ -328,7 +335,7 @@ def __post_init__(self) -> None: def get_reader(self) -> ReadlineAlikeReader: if self.reader is None: - console = UnixConsole(self.f_in, self.f_out, encoding=ENCODING) + console = Console(self.f_in, self.f_out, encoding=ENCODING) self.reader = ReadlineAlikeReader(console=console, config=self.config) return self.reader diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index 11e831c1d6c5d4..c624f6e12a7094 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -34,8 +34,12 @@ from types import ModuleType from .readline import _get_reader, multiline_input -from .unix_console import _error +_error: tuple[type[Exception], ...] | type[Exception] +try: + from .unix_console import _error +except ModuleNotFoundError: + from .windows_console import _error def check() -> str: """Returns the error message if there is a problem initializing the state.""" diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py index ec7d0636b9aeb3..4bdb02261982c3 100644 --- a/Lib/_pyrepl/unix_console.py +++ b/Lib/_pyrepl/unix_console.py @@ -143,18 +143,7 @@ def __init__( - term (str): Terminal name. - encoding (str): Encoding to use for I/O operations. """ - - self.encoding = encoding or sys.getdefaultencoding() - - if isinstance(f_in, int): - self.input_fd = f_in - else: - self.input_fd = f_in.fileno() - - if isinstance(f_out, int): - self.output_fd = f_out - else: - self.output_fd = f_out.fileno() + super().__init__(f_in, f_out, term, encoding) self.pollob = poll() self.pollob.register(self.input_fd, select.POLLIN) @@ -592,14 +581,19 @@ def __write_changed_line(self, y, oldline, newline, px_coord): px_pos = 0 j = 0 for c in oldline: - if j >= px_coord: break + if j >= px_coord: + break j += wlen(c) px_pos += 1 # reuse the oldline as much as possible, but stop as soon as we # encounter an ESCAPE, because it might be the start of an escape # sequene - while x_coord < minlen and oldline[x_pos] == newline[x_pos] and newline[x_pos] != "\x1b": + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): x_coord += wlen(newline[x_pos]) x_pos += 1 @@ -619,7 +613,11 @@ def __write_changed_line(self, y, oldline, newline, px_coord): self.__posxy = x_coord + character_width, y # if it's a single character change in the middle of the line - elif x_coord < minlen and oldline[x_pos + 1 :] == newline[x_pos + 1 :] and wlen(oldline[x_pos]) == wlen(newline[x_pos]): + elif ( + x_coord < minlen + and oldline[x_pos + 1 :] == newline[x_pos + 1 :] + and wlen(oldline[x_pos]) == wlen(newline[x_pos]) + ): character_width = wlen(newline[x_pos]) self.__move(x_coord, y) self.__write(newline[x_pos]) diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py new file mode 100644 index 00000000000000..2277865e3262fc --- /dev/null +++ b/Lib/_pyrepl/windows_console.py @@ -0,0 +1,587 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import io +from multiprocessing import Value +import os +import sys + +from abc import ABC, abstractmethod +from collections import deque +from dataclasses import dataclass, field +import ctypes +from ctypes.wintypes import ( + _COORD, + WORD, + SMALL_RECT, + BOOL, + HANDLE, + CHAR, + DWORD, + WCHAR, + SHORT, +) +from ctypes import Structure, POINTER, Union +from .console import Event, Console +from .trace import trace +from .utils import wlen + +try: + from ctypes import GetLastError, WinDLL, windll, WinError # type: ignore[attr-defined] +except: + # Keep MyPy happy off Windows + from ctypes import CDLL as WinDLL, cdll as windll + + def GetLastError() -> int: + return 42 + + class WinError(OSError): # type: ignore[no-redef] + def __init__(self, err: int | None, descr: str | None = None) -> None: + self.err = err + self.descr = descr + + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + +VK_MAP: dict[int, str] = { + 0x23: "end", # VK_END + 0x24: "home", # VK_HOME + 0x25: "left", # VK_LEFT + 0x26: "up", # VK_UP + 0x27: "right", # VK_RIGHT + 0x28: "down", # VK_DOWN + 0x2E: "delete", # VK_DELETE + 0x70: "f1", # VK_F1 + 0x71: "f2", # VK_F2 + 0x72: "f3", # VK_F3 + 0x73: "f4", # VK_F4 + 0x74: "f5", # VK_F5 + 0x75: "f6", # VK_F6 + 0x76: "f7", # VK_F7 + 0x77: "f8", # VK_F8 + 0x78: "f9", # VK_F9 + 0x79: "f10", # VK_F10 + 0x7A: "f11", # VK_F11 + 0x7B: "f12", # VK_F12 + 0x7C: "f13", # VK_F13 + 0x7D: "f14", # VK_F14 + 0x7E: "f15", # VK_F15 + 0x7F: "f16", # VK_F16 + 0x79: "f17", # VK_F17 + 0x80: "f18", # VK_F18 + 0x81: "f19", # VK_F19 + 0x82: "f20", # VK_F20 +} + +# Console escape codes: https://learn.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences +ERASE_IN_LINE = "\x1b[K" +MOVE_LEFT = "\x1b[{}D" +MOVE_RIGHT = "\x1b[{}C" +MOVE_UP = "\x1b[{}A" +MOVE_DOWN = "\x1b[{}B" +CLEAR = "\x1b[H\x1b[J" + + +class _error(Exception): + pass + + +class WindowsConsole(Console): + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + super().__init__(f_in, f_out, term, encoding) + + SetConsoleMode( + OutHandle, + ENABLE_WRAP_AT_EOL_OUTPUT + | ENABLE_PROCESSED_OUTPUT + | ENABLE_VIRTUAL_TERMINAL_PROCESSING, + ) + self.screen: list[str] = [] + self.width = 80 + self.height = 25 + self.__offset = 0 + self.event_queue: deque[Event] = deque() + try: + self.out = io._WindowsConsoleIO(self.output_fd, "w") # type: ignore[attr-defined] + except ValueError: + # Console I/O is redirected, fallback... + self.out = None + + def refresh(self, screen: list[str], c_xy: tuple[int, int]) -> None: + """ + Refresh the console screen. + + Parameters: + - screen (list): List of strings representing the screen contents. + - c_xy (tuple): Cursor position (x, y) on the screen. + """ + cx, cy = c_xy + + while len(self.screen) < min(len(screen), self.height): + self._hide_cursor() + self._move_relative(0, len(self.screen) - 1) + self.__write("\n") + self.__posxy = 0, len(self.screen) + self.screen.append("") + + px, py = self.__posxy + old_offset = offset = self.__offset + height = self.height + + # we make sure the cursor is on the screen, and that we're + # using all of the screen if we can + if cy < offset: + offset = cy + elif cy >= offset + height: + offset = cy - height + 1 + scroll_lines = offset - old_offset + + # Scrolling the buffer as the current input is greater than the visible + # portion of the window. We need to scroll the visible portion and the + # entire history + self._scroll(scroll_lines, self._getscrollbacksize()) + self.__posxy = self.__posxy[0], self.__posxy[1] + scroll_lines + self.__offset += scroll_lines + + for i in range(scroll_lines): + self.screen.append("") + elif offset > 0 and len(screen) < offset + height: + offset = max(len(screen) - height, 0) + screen.append("") + + oldscr = self.screen[old_offset : old_offset + height] + newscr = screen[offset : offset + height] + + self.__offset = offset + + self._hide_cursor() + for ( + y, + oldline, + newline, + ) in zip(range(offset, offset + height), oldscr, newscr): + if oldline != newline: + self.__write_changed_line(y, oldline, newline, px) + + y = len(newscr) + while y < len(oldscr): + self._move_relative(0, y) + self.__posxy = 0, y + self._erase_to_end() + y += 1 + + self._show_cursor() + + self.screen = screen + self.move_cursor(cx, cy) + + def __write_changed_line( + self, y: int, oldline: str, newline: str, px_coord: int + ) -> None: + # this is frustrating; there's no reason to test (say) + # self.dch1 inside the loop -- but alternative ways of + # structuring this function are equally painful (I'm trying to + # avoid writing code generators these days...) + minlen = min(wlen(oldline), wlen(newline)) + x_pos = 0 + x_coord = 0 + + px_pos = 0 + j = 0 + for c in oldline: + if j >= px_coord: + break + j += wlen(c) + px_pos += 1 + + # reuse the oldline as much as possible, but stop as soon as we + # encounter an ESCAPE, because it might be the start of an escape + # sequene + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): + x_coord += wlen(newline[x_pos]) + x_pos += 1 + + self._hide_cursor() + self._move_relative(x_coord, y) + if wlen(oldline) > wlen(newline): + self._erase_to_end() + + self.__write(newline[x_pos:]) + if wlen(newline) == self.width: + # If we wrapped we want to start at the next line + self._move_relative(0, y + 1) + self.__posxy = 0, y + 1 + else: + self.__posxy = wlen(newline), y + + if "\x1b" in newline or y != self.__posxy[1]: + # ANSI escape characters are present, so we can't assume + # anything about the position of the cursor. Moving the cursor + # to the left margin should work to get to a known position. + self.move_cursor(0, y) + + def _scroll( + self, top: int, bottom: int, left: int | None = None, right: int | None = None + ) -> None: + scroll_rect = SMALL_RECT() + scroll_rect.Top = SHORT(top) + scroll_rect.Bottom = SHORT(bottom) + scroll_rect.Left = SHORT(0 if left is None else left) + scroll_rect.Right = SHORT( + self.getheightwidth()[1] - 1 if right is None else right + ) + destination_origin = _COORD() + fill_info = CHAR_INFO() + fill_info.UnicodeChar = " " + + if not ScrollConsoleScreenBuffer( + OutHandle, scroll_rect, None, destination_origin, fill_info + ): + raise WinError(GetLastError()) + + def _hide_cursor(self): + self.__write("\x1b[?25l") + + def _show_cursor(self): + self.__write("\x1b[?25h") + + def _enable_blinking(self): + self.__write("\x1b[?12h") + + def _disable_blinking(self): + self.__write("\x1b[?12l") + + def __write(self, text: str) -> None: + if self.out is not None: + self.out.write(text.encode(self.encoding, "replace")) + self.out.flush() + else: + os.write(self.output_fd, text.encode(self.encoding, "replace")) + + @property + def screen_xy(self) -> tuple[int, int]: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return info.dwCursorPosition.X, info.dwCursorPosition.Y + + def _erase_to_end(self) -> None: + self.__write(ERASE_IN_LINE) + + def prepare(self) -> None: + trace("prepare") + self.screen = [] + self.height, self.width = self.getheightwidth() + + self.__posxy = 0, 0 + self.__gone_tall = 0 + self.__offset = 0 + + def restore(self) -> None: + pass + + def _move_relative(self, x: int, y: int) -> None: + """Moves relative to the current __posxy""" + dx = x - self.__posxy[0] + dy = y - self.__posxy[1] + if dx < 0: + self.__write(MOVE_LEFT.format(-dx)) + elif dx > 0: + self.__write(MOVE_RIGHT.format(dx)) + + if dy < 0: + self.__write(MOVE_UP.format(-dy)) + elif dy > 0: + self.__write(MOVE_DOWN.format(dy)) + + def move_cursor(self, x: int, y: int) -> None: + if x < 0 or y < 0: + raise ValueError(f"Bad cursor position {x}, {y}") + + if y < self.__offset or y >= self.__offset + self.height: + self.event_queue.insert(0, Event("scroll", "")) + else: + self._move_relative(x, y) + self.__posxy = x, y + + def set_cursor_vis(self, visible: bool) -> None: + if visible: + self._show_cursor() + else: + self._hide_cursor() + + def getheightwidth(self) -> tuple[int, int]: + """Return (height, width) where height and width are the height + and width of the terminal window in characters.""" + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return ( + info.srWindow.Bottom - info.srWindow.Top + 1, + info.srWindow.Right - info.srWindow.Left + 1, + ) + + def _getscrollbacksize(self) -> int: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + + return info.srWindow.Bottom # type: ignore[no-any-return] + + def _read_input(self) -> INPUT_RECORD | None: + rec = INPUT_RECORD() + read = DWORD() + if not ReadConsoleInput(InHandle, rec, 1, read): + raise WinError(GetLastError()) + + if read.value == 0: + return None + + return rec + + def get_event(self, block: bool = True) -> Event | None: + """Return an Event instance. Returns None if |block| is false + and there is no event pending, otherwise waits for the + completion of an event.""" + if self.event_queue: + return self.event_queue.pop() + + while True: + rec = self._read_input() + if rec is None: + if block: + continue + return None + + if rec.EventType == WINDOW_BUFFER_SIZE_EVENT: + return Event("resize", "") + + if rec.EventType != KEY_EVENT or not rec.Event.KeyEvent.bKeyDown: + # Only process keys and keydown events + if block: + continue + return None + + key = rec.Event.KeyEvent.uChar.UnicodeChar + + if rec.Event.KeyEvent.uChar.UnicodeChar == "\r": + # Make enter make unix-like + return Event(evt="key", data="\n", raw=b"\n") + elif rec.Event.KeyEvent.wVirtualKeyCode == 8: + # Turn backspace directly into the command + return Event( + evt="key", + data="backspace", + raw=rec.Event.KeyEvent.uChar.UnicodeChar, + ) + elif rec.Event.KeyEvent.uChar.UnicodeChar == "\x00": + # Handle special keys like arrow keys and translate them into the appropriate command + code = VK_MAP.get(rec.Event.KeyEvent.wVirtualKeyCode) + if code: + return Event( + evt="key", data=code, raw=rec.Event.KeyEvent.uChar.UnicodeChar + ) + if block: + continue + + return None + + return Event(evt="key", data=key, raw=rec.Event.KeyEvent.uChar.UnicodeChar) + + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + raise NotImplementedError("push_char not supported on Windows") + + def beep(self) -> None: + self.__write("\x07") + + def clear(self) -> None: + """Wipe the screen""" + self.__write(CLEAR) + self.__posxy = 0, 0 + self.screen = [""] + + def finish(self) -> None: + """Move the cursor to the end of the display and otherwise get + ready for end. XXX could be merged with restore? Hmm.""" + y = len(self.screen) - 1 + while y >= 0 and not self.screen[y]: + y -= 1 + self._move_relative(0, min(y, self.height + self.__offset - 1)) + self.__write("\r\n") + + def flushoutput(self) -> None: + """Flush all output to the screen (assuming there's some + buffering going on somewhere). + + All output on Windows is unbuffered so this is a nop""" + pass + + def forgetinput(self) -> None: + """Forget all pending, but not yet processed input.""" + while self._read_input() is not None: + pass + + def getpending(self) -> Event: + """Return the characters that have been typed but not yet + processed.""" + return Event("key", "", b"") + + def wait(self) -> None: + """Wait for an event.""" + raise NotImplementedError("No wait support") + + def repaint(self) -> None: + raise NotImplementedError("No repaint support") + + +# Windows interop +class CONSOLE_SCREEN_BUFFER_INFO(Structure): + _fields_ = [ + ("dwSize", _COORD), + ("dwCursorPosition", _COORD), + ("wAttributes", WORD), + ("srWindow", SMALL_RECT), + ("dwMaximumWindowSize", _COORD), + ] + + +class CONSOLE_CURSOR_INFO(Structure): + _fields_ = [ + ("dwSize", DWORD), + ("bVisible", BOOL), + ] + + +class CHAR_INFO(Structure): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Attributes", WORD), + ] + + +class Char(Union): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Char", CHAR), + ] + + +class KeyEvent(ctypes.Structure): + _fields_ = [ + ("bKeyDown", BOOL), + ("wRepeatCount", WORD), + ("wVirtualKeyCode", WORD), + ("wVirtualScanCode", WORD), + ("uChar", Char), + ("dwControlKeyState", DWORD), + ] + + +class WindowsBufferSizeEvent(ctypes.Structure): + _fields_ = [("dwSize", _COORD)] + + +class ConsoleEvent(ctypes.Union): + _fields_ = [ + ("KeyEvent", KeyEvent), + ("WindowsBufferSizeEvent", WindowsBufferSizeEvent), + ] + + +class INPUT_RECORD(Structure): + _fields_ = [("EventType", WORD), ("Event", ConsoleEvent)] + + +KEY_EVENT = 0x01 +FOCUS_EVENT = 0x10 +MENU_EVENT = 0x08 +MOUSE_EVENT = 0x02 +WINDOW_BUFFER_SIZE_EVENT = 0x04 + +ENABLE_PROCESSED_OUTPUT = 0x01 +ENABLE_WRAP_AT_EOL_OUTPUT = 0x02 +ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x04 + +STD_INPUT_HANDLE = -10 +STD_OUTPUT_HANDLE = -11 + +if sys.platform == "win32": + _KERNEL32 = WinDLL("kernel32", use_last_error=True) + + GetStdHandle = windll.kernel32.GetStdHandle + GetStdHandle.argtypes = [DWORD] + GetStdHandle.restype = HANDLE + + GetConsoleScreenBufferInfo = _KERNEL32.GetConsoleScreenBufferInfo + GetConsoleScreenBufferInfo.argtypes = [ + HANDLE, + ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO), + ] + GetConsoleScreenBufferInfo.restype = BOOL + + ScrollConsoleScreenBuffer = _KERNEL32.ScrollConsoleScreenBufferW + ScrollConsoleScreenBuffer.argtypes = [ + HANDLE, + POINTER(SMALL_RECT), + POINTER(SMALL_RECT), + _COORD, + POINTER(CHAR_INFO), + ] + ScrollConsoleScreenBuffer.restype = BOOL + + SetConsoleMode = _KERNEL32.SetConsoleMode + SetConsoleMode.argtypes = [HANDLE, DWORD] + SetConsoleMode.restype = BOOL + + ReadConsoleInput = _KERNEL32.ReadConsoleInputW + ReadConsoleInput.argtypes = [HANDLE, POINTER(INPUT_RECORD), DWORD, POINTER(DWORD)] + ReadConsoleInput.restype = BOOL + + OutHandle = GetStdHandle(STD_OUTPUT_HANDLE) + InHandle = GetStdHandle(STD_INPUT_HANDLE) +else: + + def _win_only(*args, **kwargs): + raise NotImplementedError("Windows only") + + GetStdHandle = _win_only + GetConsoleScreenBufferInfo = _win_only + ScrollConsoleScreenBuffer = _win_only + SetConsoleMode = _win_only + ReadConsoleInput = _win_only + OutHandle = 0 + InHandle = 0 diff --git a/Lib/test/test_pyrepl/__init__.py b/Lib/test/test_pyrepl/__init__.py index fa38b86b847dd9..8359d9844623c2 100644 --- a/Lib/test/test_pyrepl/__init__.py +++ b/Lib/test/test_pyrepl/__init__.py @@ -1,12 +1,14 @@ import os +import sys from test.support import requires, load_package_tests from test.support.import_helper import import_module -# Optionally test pyrepl. This currently requires that the -# 'curses' resource be given on the regrtest command line using the -u -# option. Additionally, we need to attempt to import curses and readline. -requires("curses") -curses = import_module("curses") +if sys.platform != "win32": + # On non-Windows platforms, testing pyrepl currently requires that the + # 'curses' resource be given on the regrtest command line using the -u + # option. Additionally, we need to attempt to import curses and readline. + requires("curses") + curses = import_module("curses") def load_tests(*args): diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py index 75539049d43c2a..d2f5429aea7a11 100644 --- a/Lib/test/test_pyrepl/support.py +++ b/Lib/test/test_pyrepl/support.py @@ -55,7 +55,7 @@ def get_prompt(lineno, cursor_on_line) -> str: return reader -def prepare_console(events: Iterable[Event], **kwargs): +def prepare_console(events: Iterable[Event], **kwargs) -> MagicMock | Console: console = MagicMock() console.get_event.side_effect = events console.height = 100 diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index bdcabf9be05b9e..aa2722095794c9 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -508,14 +508,15 @@ def prepare_reader(self, events, namespace): reader = ReadlineAlikeReader(console=console, config=config) return reader + @patch("rlcompleter._readline_available", False) def test_simple_completion(self): - events = code_to_events("os.geten\t\n") + events = code_to_events("os.getpid\t\n") namespace = {"os": os} reader = self.prepare_reader(events, namespace) output = multiline_input(reader, namespace) - self.assertEqual(output, "os.getenv") + self.assertEqual(output, "os.getpid()") def test_completion_with_many_options(self): # Test with something that initially displays many options diff --git a/Lib/test/test_pyrepl/test_unix_console.py b/Lib/test/test_pyrepl/test_unix_console.py index e1faa00caafc27..d0b98f17ade094 100644 --- a/Lib/test/test_pyrepl/test_unix_console.py +++ b/Lib/test/test_pyrepl/test_unix_console.py @@ -1,12 +1,16 @@ import itertools +import sys +import unittest from functools import partial from unittest import TestCase from unittest.mock import MagicMock, call, patch, ANY from .support import handle_all_events, code_to_events -from _pyrepl.console import Event -from _pyrepl.unix_console import UnixConsole - +try: + from _pyrepl.console import Event + from _pyrepl.unix_console import UnixConsole +except ImportError: + pass def unix_console(events, **kwargs): console = UnixConsole() @@ -67,6 +71,7 @@ def unix_console(events, **kwargs): } +@unittest.skipIf(sys.platform == "win32", "No Unix event queue on Windows") @patch("_pyrepl.curses.tigetstr", lambda s: TERM_CAPABILITIES.get(s)) @patch( "_pyrepl.curses.tparm", diff --git a/Lib/test/test_pyrepl/test_unix_eventqueue.py b/Lib/test/test_pyrepl/test_unix_eventqueue.py index c06536b4a86a04..301f79927a741f 100644 --- a/Lib/test/test_pyrepl/test_unix_eventqueue.py +++ b/Lib/test/test_pyrepl/test_unix_eventqueue.py @@ -1,11 +1,15 @@ import tempfile import unittest +import sys from unittest.mock import patch -from _pyrepl.console import Event -from _pyrepl.unix_eventqueue import EventQueue - +try: + from _pyrepl.console import Event + from _pyrepl.unix_eventqueue import EventQueue +except ImportError: + pass +@unittest.skipIf(sys.platform == "win32", "No Unix event queue on Windows") @patch("_pyrepl.curses.tigetstr", lambda x: b"") class TestUnixEventQueue(unittest.TestCase): def setUp(self): diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py new file mode 100644 index 00000000000000..e87dfe99b1a17d --- /dev/null +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -0,0 +1,331 @@ +import itertools +import sys +import unittest +from _pyrepl.console import Event, Console +from _pyrepl.windows_console import ( + MOVE_LEFT, + MOVE_RIGHT, + MOVE_UP, + MOVE_DOWN, + ERASE_IN_LINE, +) +from functools import partial +from typing import Iterable +from unittest import TestCase, main +from unittest.mock import MagicMock, call, patch, ANY + +from .support import handle_all_events, code_to_events + +try: + from _pyrepl.console import Event + from _pyrepl.windows_console import WindowsConsole +except ImportError: + pass + + +@unittest.skipIf(sys.platform != "win32", "Test class specifically for Windows") +class WindowsConsoleTests(TestCase): + def console(self, events, **kwargs) -> Console: + console = WindowsConsole() + console.get_event = MagicMock(side_effect=events) + console._scroll = MagicMock() + console._hide_cursor = MagicMock() + console._show_cursor = MagicMock() + console._getscrollbacksize = MagicMock(42) + console.out = MagicMock() + + height = kwargs.get("height", 25) + width = kwargs.get("width", 80) + console.getheightwidth = MagicMock(side_effect=lambda: (height, width)) + + console.prepare() + for key, val in kwargs.items(): + setattr(console, key, val) + return console + + def handle_events(self, events: Iterable[Event], **kwargs): + return handle_all_events(events, partial(self.console, **kwargs)) + + def handle_events_narrow(self, events): + return self.handle_events(events, width=5) + + def handle_events_short(self, events): + return self.handle_events(events, height=1) + + def handle_events_height_3(self, events): + return self.handle_events(events, height=3) + + def test_simple_addition(self): + code = "12+34" + events = code_to_events(code) + _, con = self.handle_events(events) + con.out.write.assert_any_call(b"1") + con.out.write.assert_any_call(b"2") + con.out.write.assert_any_call(b"+") + con.out.write.assert_any_call(b"3") + con.out.write.assert_any_call(b"4") + con.restore() + + def test_wrap(self): + code = "12+34" + events = code_to_events(code) + _, con = self.handle_events_narrow(events) + con.out.write.assert_any_call(b"1") + con.out.write.assert_any_call(b"2") + con.out.write.assert_any_call(b"+") + con.out.write.assert_any_call(b"3") + con.out.write.assert_any_call(b"\\") + con.out.write.assert_any_call(b"\n") + con.out.write.assert_any_call(b"4") + con.restore() + + def test_resize_wider(self): + code = "1234567890" + events = code_to_events(code) + reader, console = self.handle_events_narrow(events) + + console.height = 20 + console.width = 80 + console.getheightwidth = MagicMock(lambda _: (20, 80)) + + def same_reader(_): + return reader + + def same_console(events): + console.get_event = MagicMock(side_effect=events) + return console + + _, con = handle_all_events( + [Event(evt="resize", data=None)], + prepare_reader=same_reader, + prepare_console=same_console, + ) + + con.out.write.assert_any_call(self.move_right(2)) + con.out.write.assert_any_call(self.move_up(2)) + con.out.write.assert_any_call(b"567890") + + con.restore() + + def test_resize_narrower(self): + code = "1234567890" + events = code_to_events(code) + reader, console = self.handle_events(events) + + console.height = 20 + console.width = 4 + console.getheightwidth = MagicMock(lambda _: (20, 4)) + + def same_reader(_): + return reader + + def same_console(events): + console.get_event = MagicMock(side_effect=events) + return console + + _, con = handle_all_events( + [Event(evt="resize", data=None)], + prepare_reader=same_reader, + prepare_console=same_console, + ) + + con.out.write.assert_any_call(b"456\\") + con.out.write.assert_any_call(b"789\\") + + con.restore() + + def test_cursor_left(self): + code = "1" + events = itertools.chain( + code_to_events(code), + [Event(evt="key", data="left", raw=bytearray(b"\x1bOD"))], + ) + _, con = self.handle_events(events) + con.out.write.assert_any_call(self.move_left()) + con.restore() + + def test_cursor_left_right(self): + code = "1" + events = itertools.chain( + code_to_events(code), + [ + Event(evt="key", data="left", raw=bytearray(b"\x1bOD")), + Event(evt="key", data="right", raw=bytearray(b"\x1bOC")), + ], + ) + _, con = self.handle_events(events) + con.out.write.assert_any_call(self.move_left()) + con.out.write.assert_any_call(self.move_right()) + con.restore() + + def test_cursor_up(self): + code = "1\n2+3" + events = itertools.chain( + code_to_events(code), + [Event(evt="key", data="up", raw=bytearray(b"\x1bOA"))], + ) + _, con = self.handle_events(events) + con.out.write.assert_any_call(self.move_up()) + con.restore() + + def test_cursor_up_down(self): + code = "1\n2+3" + events = itertools.chain( + code_to_events(code), + [ + Event(evt="key", data="up", raw=bytearray(b"\x1bOA")), + Event(evt="key", data="down", raw=bytearray(b"\x1bOB")), + ], + ) + _, con = self.handle_events(events) + con.out.write.assert_any_call(self.move_up()) + con.out.write.assert_any_call(self.move_down()) + con.restore() + + def test_cursor_back_write(self): + events = itertools.chain( + code_to_events("1"), + [Event(evt="key", data="left", raw=bytearray(b"\x1bOD"))], + code_to_events("2"), + ) + _, con = self.handle_events(events) + con.out.write.assert_any_call(b"1") + con.out.write.assert_any_call(self.move_left()) + con.out.write.assert_any_call(b"21") + con.restore() + + def test_multiline_function_move_up_short_terminal(self): + # fmt: off + code = ( + "def f():\n" + " foo" + ) + # fmt: on + + events = itertools.chain( + code_to_events(code), + [ + Event(evt="key", data="up", raw=bytearray(b"\x1bOA")), + Event(evt="scroll", data=None), + ], + ) + _, con = self.handle_events_short(events) + con.out.write.assert_any_call(self.move_left(5)) + con.out.write.assert_any_call(self.move_up()) + con.restore() + + def test_multiline_function_move_up_down_short_terminal(self): + # fmt: off + code = ( + "def f():\n" + " foo" + ) + # fmt: on + + events = itertools.chain( + code_to_events(code), + [ + Event(evt="key", data="up", raw=bytearray(b"\x1bOA")), + Event(evt="scroll", data=None), + Event(evt="key", data="down", raw=bytearray(b"\x1bOB")), + Event(evt="scroll", data=None), + ], + ) + _, con = self.handle_events_short(events) + con.out.write.assert_any_call(self.move_left(8)) + con.out.write.assert_any_call(self.erase_in_line()) + con.restore() + + def test_resize_bigger_on_multiline_function(self): + # fmt: off + code = ( + "def f():\n" + " foo" + ) + # fmt: on + + events = itertools.chain(code_to_events(code)) + reader, console = self.handle_events_short(events) + + console.height = 2 + console.getheightwidth = MagicMock(lambda _: (2, 80)) + + def same_reader(_): + return reader + + def same_console(events): + console.get_event = MagicMock(side_effect=events) + return console + + _, con = handle_all_events( + [Event(evt="resize", data=None)], + prepare_reader=same_reader, + prepare_console=same_console, + ) + con.out.write.assert_has_calls( + [ + call(self.move_left(5)), + call(self.move_up()), + call(b"def f():"), + call(self.move_left(3)), + call(self.move_down()), + ] + ) + console.restore() + con.restore() + + def test_resize_smaller_on_multiline_function(self): + # fmt: off + code = ( + "def f():\n" + " foo" + ) + # fmt: on + + events = itertools.chain(code_to_events(code)) + reader, console = self.handle_events_height_3(events) + + console.height = 1 + console.getheightwidth = MagicMock(lambda _: (1, 80)) + + def same_reader(_): + return reader + + def same_console(events): + console.get_event = MagicMock(side_effect=events) + return console + + _, con = handle_all_events( + [Event(evt="resize", data=None)], + prepare_reader=same_reader, + prepare_console=same_console, + ) + con.out.write.assert_has_calls( + [ + call(self.move_left(5)), + call(self.move_up()), + call(self.erase_in_line()), + call(b" foo"), + ] + ) + console.restore() + con.restore() + + def move_up(self, lines=1): + return MOVE_UP.format(lines).encode("utf8") + + def move_down(self, lines=1): + return MOVE_DOWN.format(lines).encode("utf8") + + def move_left(self, cols=1): + return MOVE_LEFT.format(cols).encode("utf8") + + def move_right(self, cols=1): + return MOVE_RIGHT.format(cols).encode("utf8") + + def erase_in_line(self): + return ERASE_IN_LINE.encode("utf8") + + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/Windows/2024-05-25-18-43-10.gh-issue-111201.SLPJIx.rst b/Misc/NEWS.d/next/Windows/2024-05-25-18-43-10.gh-issue-111201.SLPJIx.rst new file mode 100644 index 00000000000000..f3918ed633d78c --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-05-25-18-43-10.gh-issue-111201.SLPJIx.rst @@ -0,0 +1 @@ +Add support for new pyrepl on Windows From 010aaa32fb93c5033a698d7213469af02d76fef3 Mon Sep 17 00:00:00 2001 From: Katie Bell Date: Fri, 31 May 2024 17:58:46 +1000 Subject: [PATCH 002/373] gh-97747: Improvements to WASM browser REPL. (#97665) Improvements to WASM browser REPL. Adds a text box to write and run code outside the REPL, a stop button, and handling of Ctrl-D for EOF. --- Tools/wasm/python.html | 67 ++++++++++++++++++++++++++++++++++--- Tools/wasm/python.worker.js | 10 ++++-- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/Tools/wasm/python.html b/Tools/wasm/python.html index 17ffa0ea8bfeff..81a035a5c4cd93 100644 --- a/Tools/wasm/python.html +++ b/Tools/wasm/python.html @@ -35,11 +35,12 @@

Simple REPL for Python WASM

-
+
+ +
+
The simple REPL provides a limited Python experience in the browser. diff --git a/Tools/wasm/python.worker.js b/Tools/wasm/python.worker.js index 1b794608fffe7b..4ce4e16fc0fa19 100644 --- a/Tools/wasm/python.worker.js +++ b/Tools/wasm/python.worker.js @@ -19,18 +19,18 @@ class StdinBuffer { } stdin = () => { - if (this.numberOfCharacters + 1 === this.readIndex) { + while (this.numberOfCharacters + 1 === this.readIndex) { if (!this.sentNull) { // Must return null once to indicate we're done for now. this.sentNull = true return null } this.sentNull = false + // Prompt will reset this.readIndex to 1 this.prompt() } const char = this.buffer[this.readIndex] this.readIndex += 1 - // How do I send an EOF?? return char } } @@ -71,7 +71,11 @@ var Module = { onmessage = (event) => { if (event.data.type === 'run') { - // TODO: Set up files from event.data.files + if (event.data.files) { + for (const [filename, contents] of Object.entries(event.data.files)) { + Module.FS.writeFile(filename, contents) + } + } const ret = callMain(event.data.args) postMessage({ type: 'finished', From b278c723d79a238b14e99908e83f4b1b6a39ed3d Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 31 May 2024 11:07:16 +0300 Subject: [PATCH 003/373] gh-119780: Adjust exception messages in Lib/test/test_format.py (GH-119781) Mismatches were just output to the stdout, without making the test failing. --- Lib/test/test_format.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 8cef621bd716ac..d2026152d8e747 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -304,9 +304,9 @@ def test_str_format(self): test_exc('%c', sys.maxunicode+1, OverflowError, "%c arg not in range(0x110000)") #test_exc('%c', 2**128, OverflowError, "%c arg not in range(0x110000)") - test_exc('%c', 3.14, TypeError, "%c requires int or char") - test_exc('%c', 'ab', TypeError, "%c requires int or char") - test_exc('%c', b'x', TypeError, "%c requires int or char") + test_exc('%c', 3.14, TypeError, "%c requires an int or a unicode character, not float") + test_exc('%c', 'ab', TypeError, "%c requires an int or a unicode character, not a string of length 2") + test_exc('%c', b'x', TypeError, "%c requires an int or a unicode character, not bytes") if maxsize == 2**31-1: # crashes 2.2.1 and earlier: @@ -370,11 +370,11 @@ def __bytes__(self): test_exc(b"%c", 2**128, OverflowError, "%c arg not in range(256)") test_exc(b"%c", b"Za", TypeError, - "%c requires an integer in range(256) or a single byte") + "%c requires an integer in range(256) or a single byte, not a bytes object of length 2") test_exc(b"%c", "Y", TypeError, - "%c requires an integer in range(256) or a single byte") + "%c requires an integer in range(256) or a single byte, not str") test_exc(b"%c", 3.14, TypeError, - "%c requires an integer in range(256) or a single byte") + "%c requires an integer in range(256) or a single byte, not float") test_exc(b"%b", "Xc", TypeError, "%b requires a bytes-like object, " "or an object that implements __bytes__, not 'str'") From 94e9585e99abc2d060cedc77b3c03e06b4a0a9c4 Mon Sep 17 00:00:00 2001 From: Christopher Chavez Date: Fri, 31 May 2024 03:23:53 -0500 Subject: [PATCH 004/373] =?UTF-8?q?gh-103194:=20Fix=20Tkinter=E2=80=99s=20?= =?UTF-8?q?Tcl=20value=20type=20handling=20for=20Tcl=208.7/9.0=20(GH-10384?= =?UTF-8?q?6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of standard Tcl types were renamed, removed, or no longer registered in Tcl 8.7/9.0. This change fixes automatic conversion of Tcl values to Python values to avoid returning a Tcl_Obj where the primary Python types (int, bool, str, bytes) were returned in older Tcl. --- ...-04-24-05-34-23.gh-issue-103194.GwBwWL.rst | 4 ++ Modules/_tkinter.c | 50 ++++++++++++------- 2 files changed, 36 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-04-24-05-34-23.gh-issue-103194.GwBwWL.rst diff --git a/Misc/NEWS.d/next/Library/2023-04-24-05-34-23.gh-issue-103194.GwBwWL.rst b/Misc/NEWS.d/next/Library/2023-04-24-05-34-23.gh-issue-103194.GwBwWL.rst new file mode 100644 index 00000000000000..3f70168b81069e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-04-24-05-34-23.gh-issue-103194.GwBwWL.rst @@ -0,0 +1,4 @@ +Prepare Tkinter for C API changes in Tcl 8.7/9.0 to avoid +:class:`_tkinter.Tcl_Obj` being unexpectedly returned +instead of :class:`bool`, :class:`str`, +:class:`bytearray`, or :class:`int`. diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index c7e271faa4cf34..0cff36dd307c39 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -318,6 +318,7 @@ typedef struct { const Tcl_ObjType *BignumType; const Tcl_ObjType *ListType; const Tcl_ObjType *StringType; + const Tcl_ObjType *UTF32StringType; } TkappObject; #define Tkapp_Interp(v) (((TkappObject *) (v))->interp) @@ -588,14 +589,40 @@ Tkapp_New(const char *screenName, const char *className, } v->OldBooleanType = Tcl_GetObjType("boolean"); - v->BooleanType = Tcl_GetObjType("booleanString"); - v->ByteArrayType = Tcl_GetObjType("bytearray"); + { + Tcl_Obj *value; + int boolValue; + + /* Tcl 8.5 "booleanString" type is not registered + and is renamed to "boolean" in Tcl 9.0. + Based on approach suggested at + https://core.tcl-lang.org/tcl/info/3bb3bcf2da5b */ + value = Tcl_NewStringObj("true", -1); + Tcl_GetBooleanFromObj(NULL, value, &boolValue); + v->BooleanType = value->typePtr; + Tcl_DecrRefCount(value); + + // "bytearray" type is not registered in Tcl 9.0 + value = Tcl_NewByteArrayObj(NULL, 0); + v->ByteArrayType = value->typePtr; + Tcl_DecrRefCount(value); + } v->DoubleType = Tcl_GetObjType("double"); + /* TIP 484 suggests retrieving the "int" type without Tcl_GetObjType("int") + since it is no longer registered in Tcl 9.0. But even though Tcl 8.7 + only uses the "wideInt" type on platforms with 32-bit long, it still has + a registered "int" type, which FromObj() should recognize just in case. */ v->IntType = Tcl_GetObjType("int"); + if (v->IntType == NULL) { + Tcl_Obj *value = Tcl_NewIntObj(0); + v->IntType = value->typePtr; + Tcl_DecrRefCount(value); + } v->WideIntType = Tcl_GetObjType("wideInt"); v->BignumType = Tcl_GetObjType("bignum"); v->ListType = Tcl_GetObjType("list"); v->StringType = Tcl_GetObjType("string"); + v->UTF32StringType = Tcl_GetObjType("utf32string"); /* Delete the 'exit' command, which can screw things up */ Tcl_DeleteCommand(v->interp, "exit"); @@ -1124,14 +1151,6 @@ FromObj(TkappObject *tkapp, Tcl_Obj *value) return PyFloat_FromDouble(value->internalRep.doubleValue); } - if (value->typePtr == tkapp->IntType) { - long longValue; - if (Tcl_GetLongFromObj(interp, value, &longValue) == TCL_OK) - return PyLong_FromLong(longValue); - /* If there is an error in the long conversion, - fall through to wideInt handling. */ - } - if (value->typePtr == tkapp->IntType || value->typePtr == tkapp->WideIntType) { result = fromWideIntObj(tkapp, value); @@ -1176,17 +1195,12 @@ FromObj(TkappObject *tkapp, Tcl_Obj *value) return result; } - if (value->typePtr == tkapp->StringType) { + if (value->typePtr == tkapp->StringType || + value->typePtr == tkapp->UTF32StringType) + { return unicodeFromTclObj(value); } - if (tkapp->BooleanType == NULL && - strcmp(value->typePtr->name, "booleanString") == 0) { - /* booleanString type is not registered in Tcl */ - tkapp->BooleanType = value->typePtr; - return fromBoolean(tkapp, value); - } - if (tkapp->BignumType == NULL && strcmp(value->typePtr->name, "bignum") == 0) { /* bignum type is not registered in Tcl */ From dae0375bd97f3821c5db1602a0653a3c5dc53c5b Mon Sep 17 00:00:00 2001 From: Arnon Yaari Date: Fri, 31 May 2024 12:02:54 +0300 Subject: [PATCH 005/373] gh-111201: Improve pyrepl auto indentation (#119606) - auto-indent when editing multi-line block - ignore comments --- Lib/_pyrepl/readline.py | 27 +++++++--- Lib/test/test_pyrepl/test_pyrepl.py | 81 ++++++++++++++++++++++++++++- Lib/test/test_pyrepl/test_reader.py | 4 +- 3 files changed, 101 insertions(+), 11 deletions(-) diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 248f3854a29689..7d811bf41773fe 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -237,13 +237,24 @@ def _get_first_indentation(buffer: list[str]) -> str | None: return None -def _is_last_char_colon(buffer: list[str]) -> bool: - i = len(buffer) - while i > 0: - i -= 1 - if buffer[i] not in " \t\n": # ignore whitespaces - return buffer[i] == ":" - return False +def _should_auto_indent(buffer: list[str], pos: int) -> bool: + # check if last character before "pos" is a colon, ignoring + # whitespaces and comments. + last_char = None + while pos > 0: + pos -= 1 + if last_char is None: + if buffer[pos] not in " \t\n": # ignore whitespaces + last_char = buffer[pos] + else: + # even if we found a non-whitespace character before + # original pos, we keep going back until newline is reached + # to make sure we ignore comments + if buffer[pos] == "\n": + break + if buffer[pos] == "#": + last_char = None + return last_char == ":" class maybe_accept(commands.Command): @@ -280,7 +291,7 @@ def _newline_before_pos(): for i in range(prevlinestart, prevlinestart + indent): r.insert(r.buffer[i]) r.update_last_used_indentation() - if _is_last_char_colon(r.buffer): + if _should_auto_indent(r.buffer, r.pos): if r.last_used_indentation is not None: indentation = r.last_used_indentation else: diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index aa2722095794c9..45114e7315749f 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -312,6 +312,14 @@ def test_cursor_position_after_wrap_and_move_up(self): self.assertEqual(reader.pos, 10) self.assertEqual(reader.cxy, (1, 1)) + +class TestPyReplAutoindent(TestCase): + def prepare_reader(self, events): + console = FakeConsole(events) + config = ReadlineConfig(readline_completer=None) + reader = ReadlineAlikeReader(console=console, config=config) + return reader + def test_auto_indent_default(self): # fmt: off input_code = ( @@ -372,7 +380,6 @@ def test_auto_indent_prev_block(self): ), ) - output_code = ( "def g():\n" " pass\n" @@ -385,6 +392,78 @@ def test_auto_indent_prev_block(self): output2 = multiline_input(reader) self.assertEqual(output2, output_code) + def test_auto_indent_multiline(self): + # fmt: off + events = itertools.chain( + code_to_events( + "def f():\n" + "pass" + ), + [ + # go to the end of the first line + Event(evt="key", data="up", raw=bytearray(b"\x1bOA")), + Event(evt="key", data="\x05", raw=bytearray(b"\x1bO5")), + # new line should be autoindented + Event(evt="key", data="\n", raw=bytearray(b"\n")), + ], + code_to_events( + "pass" + ), + [ + # go to end of last line + Event(evt="key", data="down", raw=bytearray(b"\x1bOB")), + Event(evt="key", data="\x05", raw=bytearray(b"\x1bO5")), + # double newline to terminate the block + Event(evt="key", data="\n", raw=bytearray(b"\n")), + Event(evt="key", data="\n", raw=bytearray(b"\n")), + ], + ) + + output_code = ( + "def f():\n" + " pass\n" + " pass\n" + " " + ) + # fmt: on + + reader = self.prepare_reader(events) + output = multiline_input(reader) + self.assertEqual(output, output_code) + + def test_auto_indent_with_comment(self): + # fmt: off + events = code_to_events( + "def f(): # foo\n" + "pass\n\n" + ) + + output_code = ( + "def f(): # foo\n" + " pass\n" + " " + ) + # fmt: on + + reader = self.prepare_reader(events) + output = multiline_input(reader) + self.assertEqual(output, output_code) + + def test_auto_indent_ignore_comments(self): + # fmt: off + events = code_to_events( + "pass #:\n" + ) + + output_code = ( + "pass #:" + ) + # fmt: on + + reader = self.prepare_reader(events) + output = multiline_input(reader) + self.assertEqual(output, output_code) + class TestPyReplOutput(TestCase): def prepare_reader(self, events): diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 7bf7a36d8d7bb9..c9b03d5e711539 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -168,8 +168,8 @@ def test_newline_within_block_trailing_whitespace(self): expected = ( "def foo():\n" - "\n" - "\n" + " \n" + " \n" " a = 1\n" " \n" " " # HistoricalReader will trim trailing whitespace From b9965ef282d6662145d2e05b080c811132ce6fde Mon Sep 17 00:00:00 2001 From: Joshua Herman <30265+zitterbewegung@users.noreply.github.com> Date: Fri, 31 May 2024 05:05:09 -0500 Subject: [PATCH 006/373] gh-119189: Fix the power operator for Fraction (GH-119242) When using the ** operator or pow() with Fraction as the base and an exponent that is not rational, a float, or a complex, the fraction is no longer converted to a float. --- Lib/fractions.py | 4 +++- Lib/test/test_fractions.py | 10 +++++----- Misc/ACKS | 1 + .../2024-05-20-13-48-37.gh-issue-119189.dhJVs5.rst | 3 +++ 4 files changed, 12 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-20-13-48-37.gh-issue-119189.dhJVs5.rst diff --git a/Lib/fractions.py b/Lib/fractions.py index f91b4f35eff370..95adccd86e33a0 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -877,8 +877,10 @@ def __pow__(a, b, modulo=None): # A fractional power will generally produce an # irrational number. return float(a) ** float(b) - else: + elif isinstance(b, (float, complex)): return float(a) ** b + else: + return NotImplemented def __rpow__(b, a): """a ** b""" diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index 28607ee37000f9..3c7780e40db096 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -925,21 +925,21 @@ def testMixedPower(self): self.assertTypedEquals(Root(4) ** F(2, 1), Root(4, F(1))) self.assertTypedEquals(Root(4) ** F(-2, 1), Root(4, -F(1))) self.assertTypedEquals(Root(4) ** F(-2, 3), Root(4, -3.0)) - self.assertEqual(F(3, 2) ** SymbolicReal('X'), SymbolicReal('1.5 ** X')) + self.assertEqual(F(3, 2) ** SymbolicReal('X'), SymbolicReal('3/2 ** X')) self.assertEqual(SymbolicReal('X') ** F(3, 2), SymbolicReal('X ** 1.5')) - self.assertTypedEquals(F(3, 2) ** Rect(2, 0), Polar(2.25, 0.0)) - self.assertTypedEquals(F(1, 1) ** Rect(2, 3), Polar(1.0, 0.0)) + self.assertTypedEquals(F(3, 2) ** Rect(2, 0), Polar(F(9,4), 0.0)) + self.assertTypedEquals(F(1, 1) ** Rect(2, 3), Polar(F(1), 0.0)) self.assertTypedEquals(F(3, 2) ** RectComplex(2, 0), Polar(2.25, 0.0)) self.assertTypedEquals(F(1, 1) ** RectComplex(2, 3), Polar(1.0, 0.0)) self.assertTypedEquals(Polar(4, 2) ** F(3, 2), Polar(8.0, 3.0)) self.assertTypedEquals(Polar(4, 2) ** F(3, 1), Polar(64, 6)) self.assertTypedEquals(Polar(4, 2) ** F(-3, 1), Polar(0.015625, -6)) self.assertTypedEquals(Polar(4, 2) ** F(-3, 2), Polar(0.125, -3.0)) - self.assertEqual(F(3, 2) ** SymbolicComplex('X'), SymbolicComplex('1.5 ** X')) + self.assertEqual(F(3, 2) ** SymbolicComplex('X'), SymbolicComplex('3/2 ** X')) self.assertEqual(SymbolicComplex('X') ** F(3, 2), SymbolicComplex('X ** 1.5')) - self.assertEqual(F(3, 2) ** Symbolic('X'), Symbolic('1.5 ** X')) + self.assertEqual(F(3, 2) ** Symbolic('X'), Symbolic('3/2 ** X')) self.assertEqual(Symbolic('X') ** F(3, 2), Symbolic('X ** 1.5')) def testMixingWithDecimal(self): diff --git a/Misc/ACKS b/Misc/ACKS index 9c10a76f1df624..2e7e12481bacd7 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -751,6 +751,7 @@ Kasun Herath Chris Herborth Ivan Herman Jürgen Hermann +Joshua Jay Herman Gary Herron Ernie Hershey Thomas Herve diff --git a/Misc/NEWS.d/next/Library/2024-05-20-13-48-37.gh-issue-119189.dhJVs5.rst b/Misc/NEWS.d/next/Library/2024-05-20-13-48-37.gh-issue-119189.dhJVs5.rst new file mode 100644 index 00000000000000..e5cfbcf95a0b81 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-20-13-48-37.gh-issue-119189.dhJVs5.rst @@ -0,0 +1,3 @@ +When using the ``**`` operator or :func:`pow` with :class:`~fractions.Fraction` +as the base and an exponent that is not rational, a float, or a complex, the +fraction is no longer converted to a float. From 0e8d35b931f41210483cc51c4169e9a943c7f166 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 31 May 2024 13:07:19 +0100 Subject: [PATCH 007/373] gh-116145: Updated bundled Tcl/Tk on Windows to 8.6.14 (GH-117030) --- ...024-03-19-19-04-56.gh-issue-116145.srVT3d.rst | 1 + Misc/externals.spdx.json | 16 ++++++++-------- PCbuild/get_externals.bat | 6 +++--- PCbuild/tcltk.props | 2 +- 4 files changed, 13 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-03-19-19-04-56.gh-issue-116145.srVT3d.rst diff --git a/Misc/NEWS.d/next/Windows/2024-03-19-19-04-56.gh-issue-116145.srVT3d.rst b/Misc/NEWS.d/next/Windows/2024-03-19-19-04-56.gh-issue-116145.srVT3d.rst new file mode 100644 index 00000000000000..7f840b0556048a --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-03-19-19-04-56.gh-issue-116145.srVT3d.rst @@ -0,0 +1 @@ +Updated bundled Tcl/Tk to 8.6.14. diff --git a/Misc/externals.spdx.json b/Misc/externals.spdx.json index 58f8e0afd71f1b..758d41910054ce 100644 --- a/Misc/externals.spdx.json +++ b/Misc/externals.spdx.json @@ -112,42 +112,42 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "1d3f2015e49e269cf681373d433cd54d88d5ef7443fe87f5f50f5fcfe9003e73" + "checksumValue": "ad7623a44e1b6e42df47ba8f16b2b0435ac605650b5054077c4355a30473074c" } ], - "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/tcl-core-8.6.13.1.tar.gz", + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/tcl-core-8.6.14.0.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:tcl_tk:tcl_tk:8.6.13.1:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:tcl_tk:tcl_tk:8.6.14.0:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], "licenseConcluded": "NOASSERTION", "name": "tcl-core", "primaryPackagePurpose": "SOURCE", - "versionInfo": "8.6.13.1" + "versionInfo": "8.6.14.0" }, { "SPDXID": "SPDXRef-PACKAGE-tk", "checksums": [ { "algorithm": "SHA256", - "checksumValue": "6056203b8a6aaf6ea89d90a7b55dc7f407e55c093f731a98fd830a712a3c81d3" + "checksumValue": "e8d5cbe97952037962518b69aba85e324d80aa189054c163ab0ee764a448e802" } ], - "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/tk-8.6.13.1.tar.gz", + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/tk-8.6.14.0.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:tcl_tk:tcl_tk:8.6.13.1:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:tcl_tk:tcl_tk:8.6.14.0:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], "licenseConcluded": "NOASSERTION", "name": "tk", "primaryPackagePurpose": "SOURCE", - "versionInfo": "8.6.13.1" + "versionInfo": "8.6.14.0" }, { "SPDXID": "SPDXRef-PACKAGE-xz", diff --git a/PCbuild/get_externals.bat b/PCbuild/get_externals.bat index 761d3de93b777d..1927938ef0821c 100644 --- a/PCbuild/get_externals.bat +++ b/PCbuild/get_externals.bat @@ -56,8 +56,8 @@ if NOT "%IncludeLibffiSrc%"=="false" set libraries=%libraries% libffi-3.4.4 if NOT "%IncludeSSLSrc%"=="false" set libraries=%libraries% openssl-3.0.13 set libraries=%libraries% mpdecimal-4.0.0 set libraries=%libraries% sqlite-3.45.3.0 -if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tcl-core-8.6.13.1 -if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tk-8.6.13.1 +if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tcl-core-8.6.14.0 +if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tk-8.6.14.0 set libraries=%libraries% xz-5.2.5 set libraries=%libraries% zlib-1.3.1 @@ -78,7 +78,7 @@ echo.Fetching external binaries... set binaries= if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4 if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.13 -if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.13.1 +if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.14.0 if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06 for %%b in (%binaries%) do ( diff --git a/PCbuild/tcltk.props b/PCbuild/tcltk.props index 8ddf01d5dd1dca..95b699b4cac0aa 100644 --- a/PCbuild/tcltk.props +++ b/PCbuild/tcltk.props @@ -2,7 +2,7 @@ - 8.6.13.1 + 8.6.14.0 $(TclVersion) $([System.Version]::Parse($(TclVersion)).Major) $([System.Version]::Parse($(TclVersion)).Minor) From 4c387a76f3ac8509c29634f4bbda6c37a67550d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Fri, 31 May 2024 08:41:26 -0400 Subject: [PATCH 008/373] gh-111201: [pyrepl] Ensure optional platform-specific imports are optional (GH-119834) --- Lib/test/test_pyrepl/test_windows_console.py | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index e87dfe99b1a17d..60392e231508b6 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -1,24 +1,24 @@ import itertools import sys import unittest -from _pyrepl.console import Event, Console -from _pyrepl.windows_console import ( - MOVE_LEFT, - MOVE_RIGHT, - MOVE_UP, - MOVE_DOWN, - ERASE_IN_LINE, -) + from functools import partial from typing import Iterable -from unittest import TestCase, main -from unittest.mock import MagicMock, call, patch, ANY +from unittest import TestCase +from unittest.mock import MagicMock, call from .support import handle_all_events, code_to_events try: - from _pyrepl.console import Event - from _pyrepl.windows_console import WindowsConsole + from _pyrepl.console import Event, Console + from _pyrepl.windows_console import ( + WindowsConsole, + MOVE_LEFT, + MOVE_RIGHT, + MOVE_UP, + MOVE_DOWN, + ERASE_IN_LINE, + ) except ImportError: pass From 91601a55964fdb3c02b21fa3c8dc629daff2390f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 31 May 2024 16:06:10 +0200 Subject: [PATCH 009/373] gh-111201: Skip pyrepl Windows tests earlier (#119848) Don't attempt to load pyrepl Windows console if platforms others than Windows. For example, the import can fail if ctypes is missing. --- Lib/test/test_pyrepl/test_windows_console.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index 60392e231508b6..e52a54d31fb5d8 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -1,7 +1,11 @@ -import itertools import sys import unittest +if sys.platform != 'win32': + raise unittest.SkipTest("test only relevant on win32") + + +import itertools from functools import partial from typing import Iterable from unittest import TestCase @@ -23,7 +27,6 @@ pass -@unittest.skipIf(sys.platform != "win32", "Test class specifically for Windows") class WindowsConsoleTests(TestCase): def console(self, events, **kwargs) -> Console: console = WindowsConsole() From 891c1e36f4e08da107443772a4eb50c72a83836d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 31 May 2024 16:49:26 +0200 Subject: [PATCH 010/373] gh-119853: Add Include/refcount.h file (#119854) --- Include/Python.h | 1 + Include/object.h | 482 ------------------------------------------- Include/refcount.h | 500 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 501 insertions(+), 482 deletions(-) create mode 100644 Include/refcount.h diff --git a/Include/Python.h b/Include/Python.h index e05901b9e52b5a..502c5ec5aeaa3c 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -61,6 +61,7 @@ #include "pystats.h" #include "pyatomic.h" #include "object.h" +#include "refcount.h" #include "objimpl.h" #include "typeslots.h" #include "pyhash.h" diff --git a/Include/object.h b/Include/object.h index 9132784628a501..c8c63b9b2b1450 100644 --- a/Include/object.h +++ b/Include/object.h @@ -59,59 +59,6 @@ whose size is determined when the object is allocated. /* PyObject_HEAD defines the initial segment of every PyObject. */ #define PyObject_HEAD PyObject ob_base; -/* -Immortalization: - -The following indicates the immortalization strategy depending on the amount -of available bits in the reference count field. All strategies are backwards -compatible but the specific reference count value or immortalization check -might change depending on the specializations for the underlying system. - -Proper deallocation of immortal instances requires distinguishing between -statically allocated immortal instances vs those promoted by the runtime to be -immortal. The latter should be the only instances that require -cleanup during runtime finalization. -*/ - -#if SIZEOF_VOID_P > 4 -/* -In 64+ bit systems, an object will be marked as immortal by setting all of the -lower 32 bits of the reference count field, which is equal to: 0xFFFFFFFF - -Using the lower 32 bits makes the value backwards compatible by allowing -C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely -increase and decrease the objects reference count. The object would lose its -immortality, but the execution would still be correct. - -Reference count increases will use saturated arithmetic, taking advantage of -having all the lower 32 bits set, which will avoid the reference count to go -beyond the refcount limit. Immortality checks for reference count decreases will -be done by checking the bit sign flag in the lower 32 bits. -*/ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX) - -#else -/* -In 32 bit systems, an object will be marked as immortal by setting all of the -lower 30 bits of the reference count field, which is equal to: 0x3FFFFFFF - -Using the lower 30 bits makes the value backwards compatible by allowing -C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely -increase and decrease the objects reference count. The object would lose its -immortality, but the execution would still be correct. - -Reference count increases and decreases will first go through an immortality -check by comparing the reference count field to the immortality reference count. -*/ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX >> 2) -#endif - -// Py_GIL_DISABLED builds indicate immortal objects using `ob_ref_local`, which is -// always 32-bits. -#ifdef Py_GIL_DISABLED -#define _Py_IMMORTAL_REFCNT_LOCAL UINT32_MAX -#endif - // Kept for backward compatibility. It was needed by Py_TRACE_REFS build. #define _PyObject_EXTRA_INIT @@ -190,20 +137,6 @@ struct _object { // fields have been merged. #define _Py_UNOWNED_TID 0 -// The shared reference count uses the two least-significant bits to store -// flags. The remaining bits are used to store the reference count. -#define _Py_REF_SHARED_SHIFT 2 -#define _Py_REF_SHARED_FLAG_MASK 0x3 - -// The shared flags are initialized to zero. -#define _Py_REF_SHARED_INIT 0x0 -#define _Py_REF_MAYBE_WEAKREF 0x1 -#define _Py_REF_QUEUED 0x2 -#define _Py_REF_MERGED 0x3 - -// Create a shared field from a refcnt and desired flags -#define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags)) - // NOTE: In non-free-threaded builds, `struct _PyMutex` is defined in // pycore_lock.h. See pycore_lock.h for more details. struct _PyMutex { uint8_t v; }; @@ -311,24 +244,6 @@ _Py_IsOwnedByCurrentThread(PyObject *ob) } #endif -static inline Py_ssize_t Py_REFCNT(PyObject *ob) { -#if !defined(Py_GIL_DISABLED) - return ob->ob_refcnt; -#else - uint32_t local = _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local); - if (local == _Py_IMMORTAL_REFCNT_LOCAL) { - return _Py_IMMORTAL_REFCNT; - } - Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared); - return _Py_STATIC_CAST(Py_ssize_t, local) + - Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT); -#endif -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_REFCNT(ob) Py_REFCNT(_PyObject_CAST(ob)) -#endif - - // bpo-39573: The Py_SET_TYPE() function must be used to set an object type. static inline PyTypeObject* Py_TYPE(PyObject *ob) { return ob->ob_type; @@ -350,19 +265,6 @@ static inline Py_ssize_t Py_SIZE(PyObject *ob) { # define Py_SIZE(ob) Py_SIZE(_PyObject_CAST(ob)) #endif -static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) -{ -#if defined(Py_GIL_DISABLED) - return (_Py_atomic_load_uint32_relaxed(&op->ob_ref_local) == - _Py_IMMORTAL_REFCNT_LOCAL); -#elif SIZEOF_VOID_P > 4 - return (_Py_CAST(PY_INT32_T, op->ob_refcnt) < 0); -#else - return (op->ob_refcnt == _Py_IMMORTAL_REFCNT); -#endif -} -#define _Py_IsImmortal(op) _Py_IsImmortal(_PyObject_CAST(op)) - static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { return Py_TYPE(ob) == type; } @@ -371,55 +273,6 @@ static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { #endif -// Py_SET_REFCNT() implementation for stable ABI -PyAPI_FUNC(void) _Py_SetRefcnt(PyObject *ob, Py_ssize_t refcnt); - -static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { -#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030d0000 - // Stable ABI implements Py_SET_REFCNT() as a function call - // on limited C API version 3.13 and newer. - _Py_SetRefcnt(ob, refcnt); -#else - // This immortal check is for code that is unaware of immortal objects. - // The runtime tracks these objects and we should avoid as much - // as possible having extensions inadvertently change the refcnt - // of an immortalized object. - if (_Py_IsImmortal(ob)) { - return; - } - -#ifndef Py_GIL_DISABLED - ob->ob_refcnt = refcnt; -#else - if (_Py_IsOwnedByCurrentThread(ob)) { - if ((size_t)refcnt > (size_t)UINT32_MAX) { - // On overflow, make the object immortal - ob->ob_tid = _Py_UNOWNED_TID; - ob->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; - ob->ob_ref_shared = 0; - } - else { - // Set local refcount to desired refcount and shared refcount - // to zero, but preserve the shared refcount flags. - ob->ob_ref_local = _Py_STATIC_CAST(uint32_t, refcnt); - ob->ob_ref_shared &= _Py_REF_SHARED_FLAG_MASK; - } - } - else { - // Set local refcount to zero and shared refcount to desired refcount. - // Mark the object as merged. - ob->ob_tid = _Py_UNOWNED_TID; - ob->ob_ref_local = 0; - ob->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED); - } -#endif // Py_GIL_DISABLED -#endif // Py_LIMITED_API+0 < 0x030d0000 -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_SET_REFCNT(ob, refcnt) Py_SET_REFCNT(_PyObject_CAST(ob), (refcnt)) -#endif - - static inline void Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { ob->ob_type = type; } @@ -740,341 +593,6 @@ given type object has a specified feature. #define Py_TPFLAGS_HAVE_VERSION_TAG (1UL << 18) -/* -The macros Py_INCREF(op) and Py_DECREF(op) are used to increment or decrement -reference counts. Py_DECREF calls the object's deallocator function when -the refcount falls to 0; for -objects that don't contain references to other objects or heap memory -this can be the standard function free(). Both macros can be used -wherever a void expression is allowed. The argument must not be a -NULL pointer. If it may be NULL, use Py_XINCREF/Py_XDECREF instead. -The macro _Py_NewReference(op) initialize reference counts to 1, and -in special builds (Py_REF_DEBUG, Py_TRACE_REFS) performs additional -bookkeeping appropriate to the special build. - -We assume that the reference count field can never overflow; this can -be proven when the size of the field is the same as the pointer size, so -we ignore the possibility. Provided a C int is at least 32 bits (which -is implicitly assumed in many parts of this code), that's enough for -about 2**31 references to an object. - -XXX The following became out of date in Python 2.2, but I'm not sure -XXX what the full truth is now. Certainly, heap-allocated type objects -XXX can and should be deallocated. -Type objects should never be deallocated; the type pointer in an object -is not considered to be a reference to the type object, to save -complications in the deallocation function. (This is actually a -decision that's up to the implementer of each new type so if you want, -you can count such references to the type object.) -*/ - -#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) -PyAPI_FUNC(void) _Py_NegativeRefcount(const char *filename, int lineno, - PyObject *op); -PyAPI_FUNC(void) _Py_INCREF_IncRefTotal(void); -PyAPI_FUNC(void) _Py_DECREF_DecRefTotal(void); -#endif // Py_REF_DEBUG && !Py_LIMITED_API - -PyAPI_FUNC(void) _Py_Dealloc(PyObject *); - -/* -These are provided as conveniences to Python runtime embedders, so that -they can have object code that is not dependent on Python compilation flags. -*/ -PyAPI_FUNC(void) Py_IncRef(PyObject *); -PyAPI_FUNC(void) Py_DecRef(PyObject *); - -// Similar to Py_IncRef() and Py_DecRef() but the argument must be non-NULL. -// Private functions used by Py_INCREF() and Py_DECREF(). -PyAPI_FUNC(void) _Py_IncRef(PyObject *); -PyAPI_FUNC(void) _Py_DecRef(PyObject *); - -static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) -{ -#if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) - // Stable ABI implements Py_INCREF() as a function call on limited C API - // version 3.12 and newer, and on Python built in debug mode. _Py_IncRef() - // was added to Python 3.10.0a7, use Py_IncRef() on older Python versions. - // Py_IncRef() accepts NULL whereas _Py_IncRef() doesn't. -# if Py_LIMITED_API+0 >= 0x030a00A7 - _Py_IncRef(op); -# else - Py_IncRef(op); -# endif -#else - // Non-limited C API and limited C API for Python 3.9 and older access - // directly PyObject.ob_refcnt. -#if defined(Py_GIL_DISABLED) - uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); - uint32_t new_local = local + 1; - if (new_local == 0) { - // local is equal to _Py_IMMORTAL_REFCNT: do nothing - return; - } - if (_Py_IsOwnedByCurrentThread(op)) { - _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, new_local); - } - else { - _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); - } -#elif SIZEOF_VOID_P > 4 - // Portable saturated add, branching on the carry flag and set low bits - PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; - PY_UINT32_T new_refcnt = cur_refcnt + 1; - if (new_refcnt == 0) { - // cur_refcnt is equal to _Py_IMMORTAL_REFCNT: the object is immortal, - // do nothing - return; - } - op->ob_refcnt_split[PY_BIG_ENDIAN] = new_refcnt; -#else - // Explicitly check immortality against the immortal value - if (_Py_IsImmortal(op)) { - return; - } - op->ob_refcnt++; -#endif - _Py_INCREF_STAT_INC(); -#ifdef Py_REF_DEBUG - _Py_INCREF_IncRefTotal(); -#endif -#endif -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_INCREF(op) Py_INCREF(_PyObject_CAST(op)) -#endif - - -#if !defined(Py_LIMITED_API) && defined(Py_GIL_DISABLED) -// Implements Py_DECREF on objects not owned by the current thread. -PyAPI_FUNC(void) _Py_DecRefShared(PyObject *); -PyAPI_FUNC(void) _Py_DecRefSharedDebug(PyObject *, const char *, int); - -// Called from Py_DECREF by the owning thread when the local refcount reaches -// zero. The call will deallocate the object if the shared refcount is also -// zero. Otherwise, the thread gives up ownership and merges the reference -// count fields. -PyAPI_FUNC(void) _Py_MergeZeroLocalRefcount(PyObject *); -#endif - -#if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) -// Stable ABI implements Py_DECREF() as a function call on limited C API -// version 3.12 and newer, and on Python built in debug mode. _Py_DecRef() was -// added to Python 3.10.0a7, use Py_DecRef() on older Python versions. -// Py_DecRef() accepts NULL whereas _Py_IncRef() doesn't. -static inline void Py_DECREF(PyObject *op) { -# if Py_LIMITED_API+0 >= 0x030a00A7 - _Py_DecRef(op); -# else - Py_DecRef(op); -# endif -} -#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) - -#elif defined(Py_GIL_DISABLED) && defined(Py_REF_DEBUG) -static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) -{ - uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); - if (local == _Py_IMMORTAL_REFCNT_LOCAL) { - return; - } - _Py_DECREF_STAT_INC(); - _Py_DECREF_DecRefTotal(); - if (_Py_IsOwnedByCurrentThread(op)) { - if (local == 0) { - _Py_NegativeRefcount(filename, lineno, op); - } - local--; - _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); - if (local == 0) { - _Py_MergeZeroLocalRefcount(op); - } - } - else { - _Py_DecRefSharedDebug(op, filename, lineno); - } -} -#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) - -#elif defined(Py_GIL_DISABLED) -static inline void Py_DECREF(PyObject *op) -{ - uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); - if (local == _Py_IMMORTAL_REFCNT_LOCAL) { - return; - } - _Py_DECREF_STAT_INC(); - if (_Py_IsOwnedByCurrentThread(op)) { - local--; - _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); - if (local == 0) { - _Py_MergeZeroLocalRefcount(op); - } - } - else { - _Py_DecRefShared(op); - } -} -#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) - -#elif defined(Py_REF_DEBUG) -static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) -{ - if (op->ob_refcnt <= 0) { - _Py_NegativeRefcount(filename, lineno, op); - } - if (_Py_IsImmortal(op)) { - return; - } - _Py_DECREF_STAT_INC(); - _Py_DECREF_DecRefTotal(); - if (--op->ob_refcnt == 0) { - _Py_Dealloc(op); - } -} -#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) - -#else -static inline Py_ALWAYS_INLINE void Py_DECREF(PyObject *op) -{ - // Non-limited C API and limited C API for Python 3.9 and older access - // directly PyObject.ob_refcnt. - if (_Py_IsImmortal(op)) { - return; - } - _Py_DECREF_STAT_INC(); - if (--op->ob_refcnt == 0) { - _Py_Dealloc(op); - } -} -#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) -#endif - - -/* Safely decref `op` and set `op` to NULL, especially useful in tp_clear - * and tp_dealloc implementations. - * - * Note that "the obvious" code can be deadly: - * - * Py_XDECREF(op); - * op = NULL; - * - * Typically, `op` is something like self->containee, and `self` is done - * using its `containee` member. In the code sequence above, suppose - * `containee` is non-NULL with a refcount of 1. Its refcount falls to - * 0 on the first line, which can trigger an arbitrary amount of code, - * possibly including finalizers (like __del__ methods or weakref callbacks) - * coded in Python, which in turn can release the GIL and allow other threads - * to run, etc. Such code may even invoke methods of `self` again, or cause - * cyclic gc to trigger, but-- oops! --self->containee still points to the - * object being torn down, and it may be in an insane state while being torn - * down. This has in fact been a rich historic source of miserable (rare & - * hard-to-diagnose) segfaulting (and other) bugs. - * - * The safe way is: - * - * Py_CLEAR(op); - * - * That arranges to set `op` to NULL _before_ decref'ing, so that any code - * triggered as a side-effect of `op` getting torn down no longer believes - * `op` points to a valid object. - * - * There are cases where it's safe to use the naive code, but they're brittle. - * For example, if `op` points to a Python integer, you know that destroying - * one of those can't cause problems -- but in part that relies on that - * Python integers aren't currently weakly referencable. Best practice is - * to use Py_CLEAR() even if you can't think of a reason for why you need to. - * - * gh-98724: Use a temporary variable to only evaluate the macro argument once, - * to avoid the duplication of side effects if the argument has side effects. - * - * gh-99701: If the PyObject* type is used with casting arguments to PyObject*, - * the code can be miscompiled with strict aliasing because of type punning. - * With strict aliasing, a compiler considers that two pointers of different - * types cannot read or write the same memory which enables optimization - * opportunities. - * - * If available, use _Py_TYPEOF() to use the 'op' type for temporary variables, - * and so avoid type punning. Otherwise, use memcpy() which causes type erasure - * and so prevents the compiler to reuse an old cached 'op' value after - * Py_CLEAR(). - */ -#ifdef _Py_TYPEOF -#define Py_CLEAR(op) \ - do { \ - _Py_TYPEOF(op)* _tmp_op_ptr = &(op); \ - _Py_TYPEOF(op) _tmp_old_op = (*_tmp_op_ptr); \ - if (_tmp_old_op != NULL) { \ - *_tmp_op_ptr = _Py_NULL; \ - Py_DECREF(_tmp_old_op); \ - } \ - } while (0) -#else -#define Py_CLEAR(op) \ - do { \ - PyObject **_tmp_op_ptr = _Py_CAST(PyObject**, &(op)); \ - PyObject *_tmp_old_op = (*_tmp_op_ptr); \ - if (_tmp_old_op != NULL) { \ - PyObject *_null_ptr = _Py_NULL; \ - memcpy(_tmp_op_ptr, &_null_ptr, sizeof(PyObject*)); \ - Py_DECREF(_tmp_old_op); \ - } \ - } while (0) -#endif - - -/* Function to use in case the object pointer can be NULL: */ -static inline void Py_XINCREF(PyObject *op) -{ - if (op != _Py_NULL) { - Py_INCREF(op); - } -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_XINCREF(op) Py_XINCREF(_PyObject_CAST(op)) -#endif - -static inline void Py_XDECREF(PyObject *op) -{ - if (op != _Py_NULL) { - Py_DECREF(op); - } -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_XDECREF(op) Py_XDECREF(_PyObject_CAST(op)) -#endif - -// Create a new strong reference to an object: -// increment the reference count of the object and return the object. -PyAPI_FUNC(PyObject*) Py_NewRef(PyObject *obj); - -// Similar to Py_NewRef(), but the object can be NULL. -PyAPI_FUNC(PyObject*) Py_XNewRef(PyObject *obj); - -static inline PyObject* _Py_NewRef(PyObject *obj) -{ - Py_INCREF(obj); - return obj; -} - -static inline PyObject* _Py_XNewRef(PyObject *obj) -{ - Py_XINCREF(obj); - return obj; -} - -// Py_NewRef() and Py_XNewRef() are exported as functions for the stable ABI. -// Names overridden with macros by static inline functions for best -// performances. -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj)) -# define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj)) -#else -# define Py_NewRef(obj) _Py_NewRef(obj) -# define Py_XNewRef(obj) _Py_XNewRef(obj) -#endif - - #define Py_CONSTANT_NONE 0 #define Py_CONSTANT_FALSE 1 #define Py_CONSTANT_TRUE 2 diff --git a/Include/refcount.h b/Include/refcount.h new file mode 100644 index 00000000000000..a0bd2087fb1b57 --- /dev/null +++ b/Include/refcount.h @@ -0,0 +1,500 @@ +#ifndef Py_REFCOUNT_H +#define Py_REFCOUNT_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* +Immortalization: + +The following indicates the immortalization strategy depending on the amount +of available bits in the reference count field. All strategies are backwards +compatible but the specific reference count value or immortalization check +might change depending on the specializations for the underlying system. + +Proper deallocation of immortal instances requires distinguishing between +statically allocated immortal instances vs those promoted by the runtime to be +immortal. The latter should be the only instances that require +cleanup during runtime finalization. +*/ + +#if SIZEOF_VOID_P > 4 +/* +In 64+ bit systems, an object will be marked as immortal by setting all of the +lower 32 bits of the reference count field, which is equal to: 0xFFFFFFFF + +Using the lower 32 bits makes the value backwards compatible by allowing +C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely +increase and decrease the objects reference count. The object would lose its +immortality, but the execution would still be correct. + +Reference count increases will use saturated arithmetic, taking advantage of +having all the lower 32 bits set, which will avoid the reference count to go +beyond the refcount limit. Immortality checks for reference count decreases will +be done by checking the bit sign flag in the lower 32 bits. +*/ +#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX) + +#else +/* +In 32 bit systems, an object will be marked as immortal by setting all of the +lower 30 bits of the reference count field, which is equal to: 0x3FFFFFFF + +Using the lower 30 bits makes the value backwards compatible by allowing +C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely +increase and decrease the objects reference count. The object would lose its +immortality, but the execution would still be correct. + +Reference count increases and decreases will first go through an immortality +check by comparing the reference count field to the immortality reference count. +*/ +#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX >> 2) +#endif + +// Py_GIL_DISABLED builds indicate immortal objects using `ob_ref_local`, which is +// always 32-bits. +#ifdef Py_GIL_DISABLED +#define _Py_IMMORTAL_REFCNT_LOCAL UINT32_MAX +#endif + + +#ifdef Py_GIL_DISABLED + // The shared reference count uses the two least-significant bits to store + // flags. The remaining bits are used to store the reference count. +# define _Py_REF_SHARED_SHIFT 2 +# define _Py_REF_SHARED_FLAG_MASK 0x3 + + // The shared flags are initialized to zero. +# define _Py_REF_SHARED_INIT 0x0 +# define _Py_REF_MAYBE_WEAKREF 0x1 +# define _Py_REF_QUEUED 0x2 +# define _Py_REF_MERGED 0x3 + + // Create a shared field from a refcnt and desired flags +# define _Py_REF_SHARED(refcnt, flags) \ + (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags)) +#endif // Py_GIL_DISABLED + + +static inline Py_ssize_t Py_REFCNT(PyObject *ob) { +#if !defined(Py_GIL_DISABLED) + return ob->ob_refcnt; +#else + uint32_t local = _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local); + if (local == _Py_IMMORTAL_REFCNT_LOCAL) { + return _Py_IMMORTAL_REFCNT; + } + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared); + return _Py_STATIC_CAST(Py_ssize_t, local) + + Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT); +#endif +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_REFCNT(ob) Py_REFCNT(_PyObject_CAST(ob)) +#endif + + +static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) +{ +#if defined(Py_GIL_DISABLED) + return (_Py_atomic_load_uint32_relaxed(&op->ob_ref_local) == + _Py_IMMORTAL_REFCNT_LOCAL); +#elif SIZEOF_VOID_P > 4 + return (_Py_CAST(PY_INT32_T, op->ob_refcnt) < 0); +#else + return (op->ob_refcnt == _Py_IMMORTAL_REFCNT); +#endif +} +#define _Py_IsImmortal(op) _Py_IsImmortal(_PyObject_CAST(op)) + + +// Py_SET_REFCNT() implementation for stable ABI +PyAPI_FUNC(void) _Py_SetRefcnt(PyObject *ob, Py_ssize_t refcnt); + +static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030d0000 + // Stable ABI implements Py_SET_REFCNT() as a function call + // on limited C API version 3.13 and newer. + _Py_SetRefcnt(ob, refcnt); +#else + // This immortal check is for code that is unaware of immortal objects. + // The runtime tracks these objects and we should avoid as much + // as possible having extensions inadvertently change the refcnt + // of an immortalized object. + if (_Py_IsImmortal(ob)) { + return; + } + +#ifndef Py_GIL_DISABLED + ob->ob_refcnt = refcnt; +#else + if (_Py_IsOwnedByCurrentThread(ob)) { + if ((size_t)refcnt > (size_t)UINT32_MAX) { + // On overflow, make the object immortal + ob->ob_tid = _Py_UNOWNED_TID; + ob->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; + ob->ob_ref_shared = 0; + } + else { + // Set local refcount to desired refcount and shared refcount + // to zero, but preserve the shared refcount flags. + ob->ob_ref_local = _Py_STATIC_CAST(uint32_t, refcnt); + ob->ob_ref_shared &= _Py_REF_SHARED_FLAG_MASK; + } + } + else { + // Set local refcount to zero and shared refcount to desired refcount. + // Mark the object as merged. + ob->ob_tid = _Py_UNOWNED_TID; + ob->ob_ref_local = 0; + ob->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED); + } +#endif // Py_GIL_DISABLED +#endif // Py_LIMITED_API+0 < 0x030d0000 +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_SET_REFCNT(ob, refcnt) Py_SET_REFCNT(_PyObject_CAST(ob), (refcnt)) +#endif + + +/* +The macros Py_INCREF(op) and Py_DECREF(op) are used to increment or decrement +reference counts. Py_DECREF calls the object's deallocator function when +the refcount falls to 0; for +objects that don't contain references to other objects or heap memory +this can be the standard function free(). Both macros can be used +wherever a void expression is allowed. The argument must not be a +NULL pointer. If it may be NULL, use Py_XINCREF/Py_XDECREF instead. +The macro _Py_NewReference(op) initialize reference counts to 1, and +in special builds (Py_REF_DEBUG, Py_TRACE_REFS) performs additional +bookkeeping appropriate to the special build. + +We assume that the reference count field can never overflow; this can +be proven when the size of the field is the same as the pointer size, so +we ignore the possibility. Provided a C int is at least 32 bits (which +is implicitly assumed in many parts of this code), that's enough for +about 2**31 references to an object. + +XXX The following became out of date in Python 2.2, but I'm not sure +XXX what the full truth is now. Certainly, heap-allocated type objects +XXX can and should be deallocated. +Type objects should never be deallocated; the type pointer in an object +is not considered to be a reference to the type object, to save +complications in the deallocation function. (This is actually a +decision that's up to the implementer of each new type so if you want, +you can count such references to the type object.) +*/ + +#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) +PyAPI_FUNC(void) _Py_NegativeRefcount(const char *filename, int lineno, + PyObject *op); +PyAPI_FUNC(void) _Py_INCREF_IncRefTotal(void); +PyAPI_FUNC(void) _Py_DECREF_DecRefTotal(void); +#endif // Py_REF_DEBUG && !Py_LIMITED_API + +PyAPI_FUNC(void) _Py_Dealloc(PyObject *); + + +/* +These are provided as conveniences to Python runtime embedders, so that +they can have object code that is not dependent on Python compilation flags. +*/ +PyAPI_FUNC(void) Py_IncRef(PyObject *); +PyAPI_FUNC(void) Py_DecRef(PyObject *); + +// Similar to Py_IncRef() and Py_DecRef() but the argument must be non-NULL. +// Private functions used by Py_INCREF() and Py_DECREF(). +PyAPI_FUNC(void) _Py_IncRef(PyObject *); +PyAPI_FUNC(void) _Py_DecRef(PyObject *); + +static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) +{ +#if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) + // Stable ABI implements Py_INCREF() as a function call on limited C API + // version 3.12 and newer, and on Python built in debug mode. _Py_IncRef() + // was added to Python 3.10.0a7, use Py_IncRef() on older Python versions. + // Py_IncRef() accepts NULL whereas _Py_IncRef() doesn't. +# if Py_LIMITED_API+0 >= 0x030a00A7 + _Py_IncRef(op); +# else + Py_IncRef(op); +# endif +#else + // Non-limited C API and limited C API for Python 3.9 and older access + // directly PyObject.ob_refcnt. +#if defined(Py_GIL_DISABLED) + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + uint32_t new_local = local + 1; + if (new_local == 0) { + // local is equal to _Py_IMMORTAL_REFCNT: do nothing + return; + } + if (_Py_IsOwnedByCurrentThread(op)) { + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, new_local); + } + else { + _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); + } +#elif SIZEOF_VOID_P > 4 + // Portable saturated add, branching on the carry flag and set low bits + PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; + PY_UINT32_T new_refcnt = cur_refcnt + 1; + if (new_refcnt == 0) { + // cur_refcnt is equal to _Py_IMMORTAL_REFCNT: the object is immortal, + // do nothing + return; + } + op->ob_refcnt_split[PY_BIG_ENDIAN] = new_refcnt; +#else + // Explicitly check immortality against the immortal value + if (_Py_IsImmortal(op)) { + return; + } + op->ob_refcnt++; +#endif + _Py_INCREF_STAT_INC(); +#ifdef Py_REF_DEBUG + _Py_INCREF_IncRefTotal(); +#endif +#endif +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_INCREF(op) Py_INCREF(_PyObject_CAST(op)) +#endif + + +#if !defined(Py_LIMITED_API) && defined(Py_GIL_DISABLED) +// Implements Py_DECREF on objects not owned by the current thread. +PyAPI_FUNC(void) _Py_DecRefShared(PyObject *); +PyAPI_FUNC(void) _Py_DecRefSharedDebug(PyObject *, const char *, int); + +// Called from Py_DECREF by the owning thread when the local refcount reaches +// zero. The call will deallocate the object if the shared refcount is also +// zero. Otherwise, the thread gives up ownership and merges the reference +// count fields. +PyAPI_FUNC(void) _Py_MergeZeroLocalRefcount(PyObject *); +#endif + +#if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) +// Stable ABI implements Py_DECREF() as a function call on limited C API +// version 3.12 and newer, and on Python built in debug mode. _Py_DecRef() was +// added to Python 3.10.0a7, use Py_DecRef() on older Python versions. +// Py_DecRef() accepts NULL whereas _Py_IncRef() doesn't. +static inline void Py_DECREF(PyObject *op) { +# if Py_LIMITED_API+0 >= 0x030a00A7 + _Py_DecRef(op); +# else + Py_DecRef(op); +# endif +} +#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) + +#elif defined(Py_GIL_DISABLED) && defined(Py_REF_DEBUG) +static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + if (local == _Py_IMMORTAL_REFCNT_LOCAL) { + return; + } + _Py_DECREF_STAT_INC(); + _Py_DECREF_DecRefTotal(); + if (_Py_IsOwnedByCurrentThread(op)) { + if (local == 0) { + _Py_NegativeRefcount(filename, lineno, op); + } + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefSharedDebug(op, filename, lineno); + } +} +#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) + +#elif defined(Py_GIL_DISABLED) +static inline void Py_DECREF(PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + if (local == _Py_IMMORTAL_REFCNT_LOCAL) { + return; + } + _Py_DECREF_STAT_INC(); + if (_Py_IsOwnedByCurrentThread(op)) { + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefShared(op); + } +} +#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) + +#elif defined(Py_REF_DEBUG) +static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) +{ + if (op->ob_refcnt <= 0) { + _Py_NegativeRefcount(filename, lineno, op); + } + if (_Py_IsImmortal(op)) { + return; + } + _Py_DECREF_STAT_INC(); + _Py_DECREF_DecRefTotal(); + if (--op->ob_refcnt == 0) { + _Py_Dealloc(op); + } +} +#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) + +#else +static inline Py_ALWAYS_INLINE void Py_DECREF(PyObject *op) +{ + // Non-limited C API and limited C API for Python 3.9 and older access + // directly PyObject.ob_refcnt. + if (_Py_IsImmortal(op)) { + return; + } + _Py_DECREF_STAT_INC(); + if (--op->ob_refcnt == 0) { + _Py_Dealloc(op); + } +} +#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) +#endif + + +/* Safely decref `op` and set `op` to NULL, especially useful in tp_clear + * and tp_dealloc implementations. + * + * Note that "the obvious" code can be deadly: + * + * Py_XDECREF(op); + * op = NULL; + * + * Typically, `op` is something like self->containee, and `self` is done + * using its `containee` member. In the code sequence above, suppose + * `containee` is non-NULL with a refcount of 1. Its refcount falls to + * 0 on the first line, which can trigger an arbitrary amount of code, + * possibly including finalizers (like __del__ methods or weakref callbacks) + * coded in Python, which in turn can release the GIL and allow other threads + * to run, etc. Such code may even invoke methods of `self` again, or cause + * cyclic gc to trigger, but-- oops! --self->containee still points to the + * object being torn down, and it may be in an insane state while being torn + * down. This has in fact been a rich historic source of miserable (rare & + * hard-to-diagnose) segfaulting (and other) bugs. + * + * The safe way is: + * + * Py_CLEAR(op); + * + * That arranges to set `op` to NULL _before_ decref'ing, so that any code + * triggered as a side-effect of `op` getting torn down no longer believes + * `op` points to a valid object. + * + * There are cases where it's safe to use the naive code, but they're brittle. + * For example, if `op` points to a Python integer, you know that destroying + * one of those can't cause problems -- but in part that relies on that + * Python integers aren't currently weakly referencable. Best practice is + * to use Py_CLEAR() even if you can't think of a reason for why you need to. + * + * gh-98724: Use a temporary variable to only evaluate the macro argument once, + * to avoid the duplication of side effects if the argument has side effects. + * + * gh-99701: If the PyObject* type is used with casting arguments to PyObject*, + * the code can be miscompiled with strict aliasing because of type punning. + * With strict aliasing, a compiler considers that two pointers of different + * types cannot read or write the same memory which enables optimization + * opportunities. + * + * If available, use _Py_TYPEOF() to use the 'op' type for temporary variables, + * and so avoid type punning. Otherwise, use memcpy() which causes type erasure + * and so prevents the compiler to reuse an old cached 'op' value after + * Py_CLEAR(). + */ +#ifdef _Py_TYPEOF +#define Py_CLEAR(op) \ + do { \ + _Py_TYPEOF(op)* _tmp_op_ptr = &(op); \ + _Py_TYPEOF(op) _tmp_old_op = (*_tmp_op_ptr); \ + if (_tmp_old_op != NULL) { \ + *_tmp_op_ptr = _Py_NULL; \ + Py_DECREF(_tmp_old_op); \ + } \ + } while (0) +#else +#define Py_CLEAR(op) \ + do { \ + PyObject **_tmp_op_ptr = _Py_CAST(PyObject**, &(op)); \ + PyObject *_tmp_old_op = (*_tmp_op_ptr); \ + if (_tmp_old_op != NULL) { \ + PyObject *_null_ptr = _Py_NULL; \ + memcpy(_tmp_op_ptr, &_null_ptr, sizeof(PyObject*)); \ + Py_DECREF(_tmp_old_op); \ + } \ + } while (0) +#endif + + +/* Function to use in case the object pointer can be NULL: */ +static inline void Py_XINCREF(PyObject *op) +{ + if (op != _Py_NULL) { + Py_INCREF(op); + } +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_XINCREF(op) Py_XINCREF(_PyObject_CAST(op)) +#endif + +static inline void Py_XDECREF(PyObject *op) +{ + if (op != _Py_NULL) { + Py_DECREF(op); + } +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_XDECREF(op) Py_XDECREF(_PyObject_CAST(op)) +#endif + +// Create a new strong reference to an object: +// increment the reference count of the object and return the object. +PyAPI_FUNC(PyObject*) Py_NewRef(PyObject *obj); + +// Similar to Py_NewRef(), but the object can be NULL. +PyAPI_FUNC(PyObject*) Py_XNewRef(PyObject *obj); + +static inline PyObject* _Py_NewRef(PyObject *obj) +{ + Py_INCREF(obj); + return obj; +} + +static inline PyObject* _Py_XNewRef(PyObject *obj) +{ + Py_XINCREF(obj); + return obj; +} + +// Py_NewRef() and Py_XNewRef() are exported as functions for the stable ABI. +// Names overridden with macros by static inline functions for best +// performances. +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj)) +# define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj)) +#else +# define Py_NewRef(obj) _Py_NewRef(obj) +# define Py_XNewRef(obj) _Py_XNewRef(obj) +#endif + + +#ifdef __cplusplus +} +#endif +#endif // !Py_REFCOUNT_H From bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 31 May 2024 10:50:52 -0400 Subject: [PATCH 011/373] gh-119585: Fix crash involving `PyGILState_Release()` and `PyThreadState_Clear()` (#119753) Make sure that `gilstate_counter` is not zero in when calling `PyThreadState_Clear()`. A destructor called from `PyThreadState_Clear()` may call back into `PyGILState_Ensure()` and `PyGILState_Release()`. If `gilstate_counter` is zero, it will try to create a new thread state before the current active thread state is destroyed, leading to an assertion failure or crash. --- Lib/test/test_capi/test_misc.py | 16 ++++++++++++++++ ...024-05-29-21-05-59.gh-issue-119585.Sn7JL3.rst | 5 +++++ Modules/_testcapimodule.c | 9 +++++++++ Python/pystate.c | 6 ++++++ 4 files changed, 36 insertions(+) create mode 100644 Misc/NEWS.d/next/C API/2024-05-29-21-05-59.gh-issue-119585.Sn7JL3.rst diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index ed42d7b64302f9..f3d16e4a2fc92a 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2888,6 +2888,22 @@ def callback(): t.start() t.join() + @threading_helper.reap_threads + @threading_helper.requires_working_threading() + def test_thread_gilstate_in_clear(self): + # See https://github.com/python/cpython/issues/119585 + class C: + def __del__(self): + _testcapi.gilstate_ensure_release() + + # Thread-local variables are destroyed in `PyThreadState_Clear()`. + local_var = threading.local() + + def callback(): + local_var.x = C() + + _testcapi._test_thread_state(callback) + @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_gilstate_ensure_no_deadlock(self): diff --git a/Misc/NEWS.d/next/C API/2024-05-29-21-05-59.gh-issue-119585.Sn7JL3.rst b/Misc/NEWS.d/next/C API/2024-05-29-21-05-59.gh-issue-119585.Sn7JL3.rst new file mode 100644 index 00000000000000..038dec2dbf90d1 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-05-29-21-05-59.gh-issue-119585.Sn7JL3.rst @@ -0,0 +1,5 @@ +Fix crash when a thread state that was created by :c:func:`PyGILState_Ensure` +calls a destructor that during :c:func:`PyThreadState_Clear` that +calls back into :c:func:`PyGILState_Ensure` and :c:func:`PyGILState_Release`. +This might occur when in the free-threaded build or when using thread-local +variables whose destructors call :c:func:`PyGILState_Ensure`. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index f99ebf0dde4f9e..b58c17260626c2 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -764,6 +764,14 @@ test_thread_state(PyObject *self, PyObject *args) Py_RETURN_NONE; } +static PyObject * +gilstate_ensure_release(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + PyGILState_STATE state = PyGILState_Ensure(); + PyGILState_Release(state); + Py_RETURN_NONE; +} + #ifndef MS_WINDOWS static PyThread_type_lock wait_done = NULL; @@ -3351,6 +3359,7 @@ static PyMethodDef TestMethods[] = { {"test_get_type_dict", test_get_type_dict, METH_NOARGS}, {"test_reftracer", test_reftracer, METH_NOARGS}, {"_test_thread_state", test_thread_state, METH_VARARGS}, + {"gilstate_ensure_release", gilstate_ensure_release, METH_NOARGS}, #ifndef MS_WINDOWS {"_spawn_pthread_waiter", spawn_pthread_waiter, METH_NOARGS}, {"_end_spawned_pthread", end_spawned_pthread, METH_NOARGS}, diff --git a/Python/pystate.c b/Python/pystate.c index 1ea1ad982a0ec9..ad7e082ce0d37e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2808,12 +2808,18 @@ PyGILState_Release(PyGILState_STATE oldstate) /* can't have been locked when we created it */ assert(oldstate == PyGILState_UNLOCKED); // XXX Unbind tstate here. + // gh-119585: `PyThreadState_Clear()` may call destructors that + // themselves use PyGILState_Ensure and PyGILState_Release, so make + // sure that gilstate_counter is not zero when calling it. + ++tstate->gilstate_counter; PyThreadState_Clear(tstate); + --tstate->gilstate_counter; /* Delete the thread-state. Note this releases the GIL too! * It's vital that the GIL be held here, to avoid shutdown * races; see bugs 225673 and 1061968 (that nasty bug has a * habit of coming back). */ + assert(tstate->gilstate_counter == 0); assert(current_fast_get() == tstate); _PyThreadState_DeleteCurrent(tstate); } From 64ff1e217d963b48140326e8b63c62f4b306f4a0 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 31 May 2024 17:18:40 +0200 Subject: [PATCH 012/373] gh-119770: Make termios ioctl() constants positive (#119840) --- Lib/test/test_ioctl.py | 22 ++++++------------- Lib/test/test_termios.py | 9 ++++++++ ...-05-31-12-57-31.gh-issue-119770.NCtels.rst | 1 + Modules/termios.c | 18 ++++++++++++--- 4 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-31-12-57-31.gh-issue-119770.NCtels.rst diff --git a/Lib/test/test_ioctl.py b/Lib/test/test_ioctl.py index 7b7067eb7b61d4..04934dfa16a5f0 100644 --- a/Lib/test/test_ioctl.py +++ b/Lib/test/test_ioctl.py @@ -66,23 +66,15 @@ def test_ioctl_mutate_2048(self): # Test with a larger buffer, just for the record. self._check_ioctl_mutate_len(2048) - def test_ioctl_signed_unsigned_code_param(self): - if not pty: - raise unittest.SkipTest('pty module required') + @unittest.skipIf(pty is None, 'pty module required') + def test_ioctl_set_window_size(self): mfd, sfd = pty.openpty() try: - if termios.TIOCSWINSZ < 0: - set_winsz_opcode_maybe_neg = termios.TIOCSWINSZ - set_winsz_opcode_pos = termios.TIOCSWINSZ & 0xffffffff - else: - set_winsz_opcode_pos = termios.TIOCSWINSZ - set_winsz_opcode_maybe_neg, = struct.unpack("i", - struct.pack("I", termios.TIOCSWINSZ)) - - our_winsz = struct.pack("HHHH",80,25,0,0) - # test both with a positive and potentially negative ioctl code - new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_pos, our_winsz) - new_winsz = fcntl.ioctl(mfd, set_winsz_opcode_maybe_neg, our_winsz) + # (rows, columns, xpixel, ypixel) + our_winsz = struct.pack("HHHH", 20, 40, 0, 0) + result = fcntl.ioctl(mfd, termios.TIOCSWINSZ, our_winsz) + new_winsz = struct.unpack("HHHH", result) + self.assertEqual(new_winsz[:2], (20, 40)) finally: os.close(mfd) os.close(sfd) diff --git a/Lib/test/test_termios.py b/Lib/test/test_termios.py index 58698ffac2d981..22e397c7a409c4 100644 --- a/Lib/test/test_termios.py +++ b/Lib/test/test_termios.py @@ -211,6 +211,15 @@ def test_constants(self): self.assertLess(termios.VTIME, termios.NCCS) self.assertLess(termios.VMIN, termios.NCCS) + def test_ioctl_constants(self): + # gh-119770: ioctl() constants must be positive + for name in dir(termios): + if not name.startswith('TIO'): + continue + value = getattr(termios, name) + with self.subTest(name=name): + self.assertGreaterEqual(value, 0) + def test_exception(self): self.assertTrue(issubclass(termios.error, Exception)) self.assertFalse(issubclass(termios.error, OSError)) diff --git a/Misc/NEWS.d/next/Library/2024-05-31-12-57-31.gh-issue-119770.NCtels.rst b/Misc/NEWS.d/next/Library/2024-05-31-12-57-31.gh-issue-119770.NCtels.rst new file mode 100644 index 00000000000000..94265e442db584 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-31-12-57-31.gh-issue-119770.NCtels.rst @@ -0,0 +1 @@ +Make :mod:`termios` ``ioctl()`` constants positive. Patch by Victor Stinner. diff --git a/Modules/termios.c b/Modules/termios.c index 0633d8f82cc7e4..efb5fcc17fa5ef 100644 --- a/Modules/termios.c +++ b/Modules/termios.c @@ -1352,9 +1352,21 @@ termios_exec(PyObject *mod) } while (constant->name != NULL) { - if (PyModule_AddIntConstant( - mod, constant->name, constant->value) < 0) { - return -1; + if (strncmp(constant->name, "TIO", 3) == 0) { + // gh-119770: Convert value to unsigned int for ioctl() constants, + // constants can be negative on macOS whereas ioctl() expects an + // unsigned long 'request'. + unsigned int value = constant->value & UINT_MAX; + if (PyModule_Add(mod, constant->name, + PyLong_FromUnsignedLong(value)) < 0) { + return -1; + } + } + else { + if (PyModule_AddIntConstant( + mod, constant->name, constant->value) < 0) { + return -1; + } } ++constant; } From 078b8c8cf2bf68f7484cc4d2e3dd74b6fab55664 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 31 May 2024 13:04:59 -0400 Subject: [PATCH 013/373] gh-119369: Fix deadlock during thread exit in free-threaded build (#119528) Release the GIL before calling `_Py_qsbr_unregister`. The deadlock could occur when the GIL was enabled at runtime. The `_Py_qsbr_unregister` call might block while holding the GIL because the thread state was not active, but the GIL was still held. --- ...-05-24-21-16-52.gh-issue-119369.qBThho.rst | 2 ++ Python/pystate.c | 21 +++++++++++-------- Python/qsbr.c | 5 +++++ 3 files changed, 19 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-24-21-16-52.gh-issue-119369.qBThho.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-24-21-16-52.gh-issue-119369.qBThho.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-24-21-16-52.gh-issue-119369.qBThho.rst new file mode 100644 index 00000000000000..7abdd5cd85ccd6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-24-21-16-52.gh-issue-119369.qBThho.rst @@ -0,0 +1,2 @@ +Fix deadlock during thread deletion in free-threaded build, which could +occur when the GIL was enabled at runtime. diff --git a/Python/pystate.c b/Python/pystate.c index ad7e082ce0d37e..36e4206b4a282e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1751,7 +1751,7 @@ decrement_stoptheworld_countdown(struct _stoptheworld_state *stw); /* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */ static void -tstate_delete_common(PyThreadState *tstate) +tstate_delete_common(PyThreadState *tstate, int release_gil) { assert(tstate->_status.cleared && !tstate->_status.finalized); tstate_verify_not_active(tstate); @@ -1793,10 +1793,6 @@ tstate_delete_common(PyThreadState *tstate) HEAD_UNLOCK(runtime); -#ifdef Py_GIL_DISABLED - _Py_qsbr_unregister(tstate); -#endif - // XXX Unbind in PyThreadState_Clear(), or earlier // (and assert not-equal here)? if (tstate->_status.bound_gilstate) { @@ -1807,6 +1803,14 @@ tstate_delete_common(PyThreadState *tstate) // XXX Move to PyThreadState_Clear()? clear_datastack(tstate); + if (release_gil) { + _PyEval_ReleaseLock(tstate->interp, tstate, 1); + } + +#ifdef Py_GIL_DISABLED + _Py_qsbr_unregister(tstate); +#endif + tstate->_status.finalized = 1; } @@ -1818,7 +1822,7 @@ zapthreads(PyInterpreterState *interp) when the threads are all really dead (XXX famous last words). */ while ((tstate = interp->threads.head) != NULL) { tstate_verify_not_active(tstate); - tstate_delete_common(tstate); + tstate_delete_common(tstate, 0); free_threadstate((_PyThreadStateImpl *)tstate); } } @@ -1829,7 +1833,7 @@ PyThreadState_Delete(PyThreadState *tstate) { _Py_EnsureTstateNotNULL(tstate); tstate_verify_not_active(tstate); - tstate_delete_common(tstate); + tstate_delete_common(tstate, 0); free_threadstate((_PyThreadStateImpl *)tstate); } @@ -1842,8 +1846,7 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate) _Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr); #endif current_fast_clear(tstate->interp->runtime); - tstate_delete_common(tstate); - _PyEval_ReleaseLock(tstate->interp, tstate, 1); + tstate_delete_common(tstate, 1); // release GIL as part of call free_threadstate((_PyThreadStateImpl *)tstate); } diff --git a/Python/qsbr.c b/Python/qsbr.c index 1e02ff9c2e45f0..9cbce9044e2941 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -236,6 +236,11 @@ _Py_qsbr_unregister(PyThreadState *tstate) struct _qsbr_shared *shared = &tstate->interp->qsbr; struct _PyThreadStateImpl *tstate_imp = (_PyThreadStateImpl*) tstate; + // gh-119369: GIL must be released (if held) to prevent deadlocks, because + // we might not have an active tstate, which means taht blocking on PyMutex + // locks will not implicitly release the GIL. + assert(!tstate->_status.holds_gil); + PyMutex_Lock(&shared->mutex); // NOTE: we must load (or reload) the thread state's qbsr inside the mutex // because the array may have been resized (changing tstate->qsbr) while From 015b1fdd0ae03f94a5dfda051b020810d1c952dd Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 31 May 2024 18:09:48 +0100 Subject: [PATCH 014/373] gh-100117: Fix inaccuracy in documentation of the CodeObject's co_positions field. (#119364) --- Doc/reference/datamodel.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 0fe9681f93f135..134385ed2f1860 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1243,7 +1243,7 @@ Methods on code objects The iterator returns :class:`tuple`\s containing the ``(start_line, end_line, start_column, end_column)``. The *i-th* tuple corresponds to the - position of the source code that compiled to the *i-th* instruction. + position of the source code that compiled to the *i-th* code unit. Column information is 0-indexed utf-8 byte offsets on the given source line. From f3fc800d5f17b144a752a262102b750bedcdaa14 Mon Sep 17 00:00:00 2001 From: Zachary Ware Date: Fri, 31 May 2024 12:19:54 -0500 Subject: [PATCH 015/373] contextlib docs: Clean up redundant 'up' after 'cleanup' (GH-119867) Reported by Michael Kass on docs@ --- Doc/library/contextlib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/contextlib.rst b/Doc/library/contextlib.rst index 73e53aec9cbf1c..bad9da52d6a6ca 100644 --- a/Doc/library/contextlib.rst +++ b/Doc/library/contextlib.rst @@ -796,7 +796,7 @@ executing that callback:: if result: stack.pop_all() -This allows the intended cleanup up behaviour to be made explicit up front, +This allows the intended cleanup behaviour to be made explicit up front, rather than requiring a separate flag variable. If a particular application uses this pattern a lot, it can be simplified From 9bc6045842ebc91ec48ab163a9e1e8644231607c Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 31 May 2024 13:23:29 -0400 Subject: [PATCH 016/373] doc: Add glossary entry for "free threading" (#119865) --- Doc/glossary.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 1e5bafce861e52..ae9949bc2867c4 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -438,6 +438,12 @@ Glossary division. Note that ``(-11) // 4`` is ``-3`` because that is ``-2.75`` rounded *downward*. See :pep:`238`. + free threading + A threading model where multiple threads can run Python bytecode + simultaneously within the same interpreter. This is in contrast to + the :term:`global interpreter lock` which allows only one thread to + execute Python bytecode at a time. See :pep:`703`. + function A series of statements which returns some value to a caller. It can also be passed zero or more :term:`arguments ` which may be used in From 879d43b705faab0c59f1a6a0042e286f39f3a4ef Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 31 May 2024 14:18:24 -0400 Subject: [PATCH 017/373] gh-119799: Add missing `_Py_IncRefTotal` to `_Py_NewRefWithLock` (#119800) The free-threaded refleak builds were reporting negative refcount deltas in some tests because of a missing `_Py_NewRefWithLock`. --- Include/internal/pycore_object.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 7602248f956405..f63e1da6fba025 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -497,6 +497,9 @@ _Py_NewRefWithLock(PyObject *op) if (_Py_TryIncrefFast(op)) { return op; } +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyThreadState_GET()); +#endif _Py_INCREF_STAT_INC(); for (;;) { Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); From f9d47fed9fbbe9313404838050f6dfe1c7fe6340 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 31 May 2024 21:21:30 +0200 Subject: [PATCH 018/373] gh-119853: Add Include/refcount.h to projects (#119860) --- Makefile.pre.in | 1 + PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 +++ 3 files changed, 5 insertions(+) diff --git a/Makefile.pre.in b/Makefile.pre.in index a3fca80d4448ca..9a2fc34f030662 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1055,6 +1055,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/pythread.h \ $(srcdir)/Include/pytypedefs.h \ $(srcdir)/Include/rangeobject.h \ + $(srcdir)/Include/refcount.h \ $(srcdir)/Include/setobject.h \ $(srcdir)/Include/sliceobject.h \ $(srcdir)/Include/structmember.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 16fb424b11c6a8..96960f0579a936 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -349,6 +349,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index cf9bc0f4bc1c70..2e4bd786be47e9 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -207,6 +207,9 @@ Include + + Include + Include From 2237946af0981c46dc7d3886477e425ccfb37f28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Fri, 31 May 2024 16:26:02 -0400 Subject: [PATCH 019/373] gh-118894: Make asyncio REPL use pyrepl (GH-119433) --- Lib/_pyrepl/commands.py | 5 ++ Lib/_pyrepl/console.py | 57 +++++++++++- Lib/_pyrepl/reader.py | 1 + Lib/_pyrepl/simple_interact.py | 53 ++--------- Lib/asyncio/__main__.py | 89 +++++++++++++++---- Lib/test/test_pyrepl/test_interact.py | 2 +- ...-05-22-21-20-43.gh-issue-118894.xHdxR_.rst | 1 + 7 files changed, 143 insertions(+), 65 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-22-21-20-43.gh-issue-118894.xHdxR_.rst diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index ed977f84baac4e..2ef5dada9d9e58 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -219,6 +219,11 @@ def do(self) -> None: os.kill(os.getpid(), signal.SIGINT) +class ctrl_c(Command): + def do(self) -> None: + raise KeyboardInterrupt + + class suspend(Command): def do(self) -> None: import signal diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index fcabf785069ecb..aa0bde865825c9 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -19,10 +19,14 @@ from __future__ import annotations -import sys +import _colorize # type: ignore[import-not-found] from abc import ABC, abstractmethod +import ast +import code from dataclasses import dataclass, field +import os.path +import sys TYPE_CHECKING = False @@ -136,3 +140,54 @@ def wait(self) -> None: @abstractmethod def repaint(self) -> None: ... + + +class InteractiveColoredConsole(code.InteractiveConsole): + def __init__( + self, + locals: dict[str, object] | None = None, + filename: str = "", + *, + local_exit: bool = False, + ) -> None: + super().__init__(locals=locals, filename=filename, local_exit=local_exit) # type: ignore[call-arg] + self.can_colorize = _colorize.can_colorize() + + def showsyntaxerror(self, filename=None): + super().showsyntaxerror(colorize=self.can_colorize) + + def showtraceback(self): + super().showtraceback(colorize=self.can_colorize) + + def runsource(self, source, filename="", symbol="single"): + try: + tree = ast.parse(source) + except (SyntaxError, OverflowError, ValueError): + self.showsyntaxerror(filename) + return False + if tree.body: + *_, last_stmt = tree.body + for stmt in tree.body: + wrapper = ast.Interactive if stmt is last_stmt else ast.Module + the_symbol = symbol if stmt is last_stmt else "exec" + item = wrapper([stmt]) + try: + code = self.compile.compiler(item, filename, the_symbol, dont_inherit=True) + except SyntaxError as e: + if e.args[0] == "'await' outside function": + python = os.path.basename(sys.executable) + e.add_note( + f"Try the asyncio REPL ({python} -m asyncio) to use" + f" top-level 'await' and run background asyncio tasks." + ) + self.showsyntaxerror(filename) + return False + except (OverflowError, ValueError): + self.showsyntaxerror(filename) + return False + + if code is None: + return True + + self.runcode(code) + return False diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index 0045425cdddb79..5401ae7b0ae32d 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -131,6 +131,7 @@ def make_default_commands() -> dict[CommandName, type[Command]]: ("\\\\", "self-insert"), (r"\x1b[200~", "enable_bracketed_paste"), (r"\x1b[201~", "disable_bracketed_paste"), + (r"\x03", "ctrl-c"), ] + [(c, "self-insert") for c in map(chr, range(32, 127)) if c != "\\"] + [(c, "self-insert") for c in map(chr, range(128, 256)) if c.isalpha()] diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index c624f6e12a7094..256bbc7c6d7626 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -25,14 +25,13 @@ from __future__ import annotations -import _colorize # type: ignore[import-not-found] import _sitebuiltins import linecache import sys import code -import ast from types import ModuleType +from .console import InteractiveColoredConsole from .readline import _get_reader, multiline_input _error: tuple[type[Exception], ...] | type[Exception] @@ -74,57 +73,21 @@ def _clear_screen(): "clear": _clear_screen, } -class InteractiveColoredConsole(code.InteractiveConsole): - def __init__( - self, - locals: dict[str, object] | None = None, - filename: str = "", - *, - local_exit: bool = False, - ) -> None: - super().__init__(locals=locals, filename=filename, local_exit=local_exit) # type: ignore[call-arg] - self.can_colorize = _colorize.can_colorize() - - def showsyntaxerror(self, filename=None): - super().showsyntaxerror(colorize=self.can_colorize) - - def showtraceback(self): - super().showtraceback(colorize=self.can_colorize) - - def runsource(self, source, filename="", symbol="single"): - try: - tree = ast.parse(source) - except (OverflowError, SyntaxError, ValueError): - self.showsyntaxerror(filename) - return False - if tree.body: - *_, last_stmt = tree.body - for stmt in tree.body: - wrapper = ast.Interactive if stmt is last_stmt else ast.Module - the_symbol = symbol if stmt is last_stmt else "exec" - item = wrapper([stmt]) - try: - code = compile(item, filename, the_symbol, dont_inherit=True) - except (OverflowError, ValueError, SyntaxError): - self.showsyntaxerror(filename) - return False - - if code is None: - return True - - self.runcode(code) - return False - def run_multiline_interactive_console( - mainmodule: ModuleType | None= None, future_flags: int = 0 + mainmodule: ModuleType | None = None, + future_flags: int = 0, + console: code.InteractiveConsole | None = None, ) -> None: import __main__ from .readline import _setup _setup() mainmodule = mainmodule or __main__ - console = InteractiveColoredConsole(mainmodule.__dict__, filename="") + if console is None: + console = InteractiveColoredConsole( + mainmodule.__dict__, filename="" + ) if future_flags: console.compile.compiler.flags |= future_flags diff --git a/Lib/asyncio/__main__.py b/Lib/asyncio/__main__.py index 9041b8b8316c1e..91fff9aaee337b 100644 --- a/Lib/asyncio/__main__.py +++ b/Lib/asyncio/__main__.py @@ -1,42 +1,49 @@ import ast import asyncio -import code import concurrent.futures import inspect +import os import site import sys import threading import types import warnings +from _colorize import can_colorize, ANSIColors # type: ignore[import-not-found] +from _pyrepl.console import InteractiveColoredConsole + from . import futures -class AsyncIOInteractiveConsole(code.InteractiveConsole): +class AsyncIOInteractiveConsole(InteractiveColoredConsole): def __init__(self, locals, loop): - super().__init__(locals) + super().__init__(locals, filename="") self.compile.compiler.flags |= ast.PyCF_ALLOW_TOP_LEVEL_AWAIT self.loop = loop def runcode(self, code): + global return_code future = concurrent.futures.Future() def callback(): + global return_code global repl_future - global repl_future_interrupted + global keyboard_interrupted repl_future = None - repl_future_interrupted = False + keyboard_interrupted = False func = types.FunctionType(code, self.locals) try: coro = func() - except SystemExit: - raise + except SystemExit as se: + return_code = se.code + self.loop.stop() + return except KeyboardInterrupt as ex: - repl_future_interrupted = True + keyboard_interrupted = True future.set_exception(ex) return except BaseException as ex: @@ -57,10 +64,12 @@ def callback(): try: return future.result() - except SystemExit: - raise + except SystemExit as se: + return_code = se.code + self.loop.stop() + return except BaseException: - if repl_future_interrupted: + if keyboard_interrupted: self.write("\nKeyboardInterrupt\n") else: self.showtraceback() @@ -69,18 +78,56 @@ def callback(): class REPLThread(threading.Thread): def run(self): + global return_code + try: banner = ( f'asyncio REPL {sys.version} on {sys.platform}\n' f'Use "await" directly instead of "asyncio.run()".\n' f'Type "help", "copyright", "credits" or "license" ' f'for more information.\n' - f'{getattr(sys, "ps1", ">>> ")}import asyncio' ) - console.interact( - banner=banner, - exitmsg='exiting asyncio REPL...') + console.write(banner) + + if startup_path := os.getenv("PYTHONSTARTUP"): + import tokenize + with tokenize.open(startup_path) as f: + startup_code = compile(f.read(), startup_path, "exec") + exec(startup_code, console.locals) + + ps1 = getattr(sys, "ps1", ">>> ") + if can_colorize(): + ps1 = f"{ANSIColors.BOLD_MAGENTA}{ps1}{ANSIColors.RESET}" + console.write(f"{ps1}import asyncio\n") + + try: + import errno + if os.getenv("PYTHON_BASIC_REPL"): + raise RuntimeError("user environment requested basic REPL") + if not os.isatty(sys.stdin.fileno()): + raise OSError(errno.ENOTTY, "tty required", "stdin") + + # This import will fail on operating systems with no termios. + from _pyrepl.simple_interact import ( + check, + run_multiline_interactive_console, + ) + if err := check(): + raise RuntimeError(err) + except Exception as e: + console.interact(banner="", exitmsg=exit_message) + else: + try: + run_multiline_interactive_console(console=console) + except SystemExit: + # expected via the `exit` and `quit` commands + pass + except BaseException: + # unexpected issue + console.showtraceback() + console.write("Internal error, ") + return_code = 1 finally: warnings.filterwarnings( 'ignore', @@ -91,6 +138,9 @@ def run(self): if __name__ == '__main__': + CAN_USE_PYREPL = True + + return_code = 0 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) @@ -103,7 +153,7 @@ def run(self): console = AsyncIOInteractiveConsole(repl_locals, loop) repl_future = None - repl_future_interrupted = False + keyboard_interrupted = False try: import readline # NoQA @@ -126,7 +176,7 @@ def run(self): completer = rlcompleter.Completer(console.locals) readline.set_completer(completer.complete) - repl_thread = REPLThread() + repl_thread = REPLThread(name="Interactive thread") repl_thread.daemon = True repl_thread.start() @@ -134,9 +184,12 @@ def run(self): try: loop.run_forever() except KeyboardInterrupt: + keyboard_interrupted = True if repl_future and not repl_future.done(): repl_future.cancel() - repl_future_interrupted = True continue else: break + + console.write('exiting asyncio REPL...\n') + sys.exit(return_code) diff --git a/Lib/test/test_pyrepl/test_interact.py b/Lib/test/test_pyrepl/test_interact.py index 4d01ea7620109d..df97b1354a168e 100644 --- a/Lib/test/test_pyrepl/test_interact.py +++ b/Lib/test/test_pyrepl/test_interact.py @@ -6,7 +6,7 @@ from test.support import force_not_colorized -from _pyrepl.simple_interact import InteractiveColoredConsole +from _pyrepl.console import InteractiveColoredConsole class TestSimpleInteract(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-05-22-21-20-43.gh-issue-118894.xHdxR_.rst b/Misc/NEWS.d/next/Library/2024-05-22-21-20-43.gh-issue-118894.xHdxR_.rst new file mode 100644 index 00000000000000..ffc4ae336dc54f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-22-21-20-43.gh-issue-118894.xHdxR_.rst @@ -0,0 +1 @@ +:mod:`asyncio` REPL now has the same capabilities as PyREPL. From 80a4e3899420faaa012c82b4e82cdb6675a6a944 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 31 May 2024 14:05:24 -0700 Subject: [PATCH 020/373] gh-119821: Support non-dict globals in LOAD_FROM_DICT_OR_GLOBALS (#119822) Support non-dict globals in LOAD_FROM_DICT_OR_GLOBALS The implementation basically copies LOAD_GLOBAL. Possibly it could be deduplicated, but that seems like it may get hairy since the two operations have different operands. This is important to fix in 3.14 for PEP 649, but it's a bug in earlier versions too, and we should backport to 3.13 and 3.12 if possible. --- Include/internal/pycore_opcode_metadata.h | 1 - Include/internal/pycore_uop_metadata.h | 4 --- Lib/test/test_type_aliases.py | 20 +++++++++++ ...-05-30-23-01-00.gh-issue-119821.jPGfvt.rst | 2 ++ Python/bytecodes.c | 35 ++++++++++++++----- Python/executor_cases.c.h | 30 +--------------- Python/generated_cases.c.h | 35 ++++++++++++++----- Python/optimizer_cases.c.h | 7 +--- 8 files changed, 76 insertions(+), 58 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-30-23-01-00.gh-issue-119821.jPGfvt.rst diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d3535800139a66..0b835230974e39 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1323,7 +1323,6 @@ _PyOpcode_macro_expansion[256] = { [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, 0, 0 } } }, [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, 5, 0 }, { _LOAD_FAST, 6, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, - [LOAD_FROM_DICT_OR_GLOBALS] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } }, [LOAD_GLOBAL] = { .nuops = 1, .uops = { { _LOAD_GLOBAL, 0, 0 } } }, [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION, 1, 2 }, { _LOAD_GLOBAL_BUILTINS, 1, 3 } } }, [LOAD_GLOBAL_MODULE] = { .nuops = 2, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _LOAD_GLOBAL_MODULE, 1, 3 } } }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 78f0eafaa32042..690ae34a6eef98 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -107,7 +107,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_LOAD_LOCALS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_FROM_DICT_OR_GLOBALS] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_LOAD_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_GLOBALS_VERSION] = HAS_DEOPT_FLAG, [_GUARD_BUILTINS_VERSION] = HAS_DEOPT_FLAG, @@ -439,7 +438,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", - [_LOAD_FROM_DICT_OR_GLOBALS] = "_LOAD_FROM_DICT_OR_GLOBALS", [_LOAD_GLOBAL] = "_LOAD_GLOBAL", [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", [_LOAD_GLOBAL_MODULE] = "_LOAD_GLOBAL_MODULE", @@ -692,8 +690,6 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_LOCALS: return 0; - case _LOAD_FROM_DICT_OR_GLOBALS: - return 1; case _LOAD_GLOBAL: return 0; case _GUARD_GLOBALS_VERSION: diff --git a/Lib/test/test_type_aliases.py b/Lib/test/test_type_aliases.py index 9c325bc595f585..f8b395fdc8bb1d 100644 --- a/Lib/test/test_type_aliases.py +++ b/Lib/test/test_type_aliases.py @@ -1,4 +1,5 @@ import pickle +import textwrap import types import unittest from test.support import check_syntax_error, run_code @@ -328,3 +329,22 @@ def test_pickling_local(self): with self.subTest(thing=thing, proto=proto): with self.assertRaises(pickle.PickleError): pickle.dumps(thing, protocol=proto) + + +class TypeParamsExoticGlobalsTest(unittest.TestCase): + def test_exec_with_unusual_globals(self): + class customdict(dict): + def __missing__(self, key): + return key + + code = compile("type Alias = undefined", "test", "exec") + ns = customdict() + exec(code, ns) + Alias = ns["Alias"] + self.assertEqual(Alias.__value__, "undefined") + + code = compile("class A: type Alias = undefined", "test", "exec") + ns = customdict() + exec(code, ns) + Alias = ns["A"].Alias + self.assertEqual(Alias.__value__, "undefined") diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-30-23-01-00.gh-issue-119821.jPGfvt.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-30-23-01-00.gh-issue-119821.jPGfvt.rst new file mode 100644 index 00000000000000..cc25eee6dd6ae4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-30-23-01-00.gh-issue-119821.jPGfvt.rst @@ -0,0 +1,2 @@ +Fix execution of :ref:`annotation scopes ` within classes +when ``globals`` is set to a non-dict. Patch by Jelle Zijlstra. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9a8198515dea5e..1c12e1cddbbc10 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1385,18 +1385,35 @@ dummy_func( ERROR_NO_POP(); } if (v == NULL) { - if (PyDict_GetItemRef(GLOBALS(), name, &v) < 0) { - ERROR_NO_POP(); - } - if (v == NULL) { - if (PyMapping_GetOptionalItem(BUILTINS(), name, &v) < 0) { + if (PyDict_CheckExact(GLOBALS()) + && PyDict_CheckExact(BUILTINS())) + { + v = _PyDict_LoadGlobal((PyDictObject *)GLOBALS(), + (PyDictObject *)BUILTINS(), + name); + if (v == NULL) { + if (!_PyErr_Occurred(tstate)) { + /* _PyDict_LoadGlobal() returns NULL without raising + * an exception if the key doesn't exist */ + _PyEval_FormatExcCheckArg(tstate, PyExc_NameError, + NAME_ERROR_MSG, name); + } ERROR_NO_POP(); } + } + else { + /* Slow-path if globals or builtins is not a dict */ + /* namespace 1: globals */ + ERROR_IF(PyMapping_GetOptionalItem(GLOBALS(), name, &v) < 0, error); if (v == NULL) { - _PyEval_FormatExcCheckArg( - tstate, PyExc_NameError, - NAME_ERROR_MSG, name); - ERROR_NO_POP(); + /* namespace 2: builtins */ + ERROR_IF(PyMapping_GetOptionalItem(BUILTINS(), name, &v) < 0, error); + if (v == NULL) { + _PyEval_FormatExcCheckArg( + tstate, PyExc_NameError, + NAME_ERROR_MSG, name); + ERROR_IF(true, error); + } } } } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index e862364cb23e7a..0dfe490cb37047 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1405,35 +1405,7 @@ break; } - case _LOAD_FROM_DICT_OR_GLOBALS: { - PyObject *mod_or_class_dict; - PyObject *v; - oparg = CURRENT_OPARG(); - mod_or_class_dict = stack_pointer[-1]; - PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); - if (PyMapping_GetOptionalItem(mod_or_class_dict, name, &v) < 0) { - JUMP_TO_ERROR(); - } - if (v == NULL) { - if (PyDict_GetItemRef(GLOBALS(), name, &v) < 0) { - JUMP_TO_ERROR(); - } - if (v == NULL) { - if (PyMapping_GetOptionalItem(BUILTINS(), name, &v) < 0) { - JUMP_TO_ERROR(); - } - if (v == NULL) { - _PyEval_FormatExcCheckArg( - tstate, PyExc_NameError, - NAME_ERROR_MSG, name); - JUMP_TO_ERROR(); - } - } - } - Py_DECREF(mod_or_class_dict); - stack_pointer[-1] = v; - break; - } + /* _LOAD_FROM_DICT_OR_GLOBALS is not a viable micro-op for tier 2 because it has both popping and not-popping errors */ /* _LOAD_NAME is not a viable micro-op for tier 2 because it has both popping and not-popping errors */ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 4402787d96f12e..1a991608385405 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4401,18 +4401,35 @@ goto error; } if (v == NULL) { - if (PyDict_GetItemRef(GLOBALS(), name, &v) < 0) { - goto error; - } - if (v == NULL) { - if (PyMapping_GetOptionalItem(BUILTINS(), name, &v) < 0) { + if (PyDict_CheckExact(GLOBALS()) + && PyDict_CheckExact(BUILTINS())) + { + v = _PyDict_LoadGlobal((PyDictObject *)GLOBALS(), + (PyDictObject *)BUILTINS(), + name); + if (v == NULL) { + if (!_PyErr_Occurred(tstate)) { + /* _PyDict_LoadGlobal() returns NULL without raising + * an exception if the key doesn't exist */ + _PyEval_FormatExcCheckArg(tstate, PyExc_NameError, + NAME_ERROR_MSG, name); + } goto error; } + } + else { + /* Slow-path if globals or builtins is not a dict */ + /* namespace 1: globals */ + if (PyMapping_GetOptionalItem(GLOBALS(), name, &v) < 0) goto pop_1_error; if (v == NULL) { - _PyEval_FormatExcCheckArg( - tstate, PyExc_NameError, - NAME_ERROR_MSG, name); - goto error; + /* namespace 2: builtins */ + if (PyMapping_GetOptionalItem(BUILTINS(), name, &v) < 0) goto pop_1_error; + if (v == NULL) { + _PyEval_FormatExcCheckArg( + tstate, PyExc_NameError, + NAME_ERROR_MSG, name); + if (true) goto pop_1_error; + } } } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 1b76f1480b4f11..b3787345ec6714 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -740,12 +740,7 @@ break; } - case _LOAD_FROM_DICT_OR_GLOBALS: { - _Py_UopsSymbol *v; - v = sym_new_not_null(ctx); - stack_pointer[-1] = v; - break; - } + /* _LOAD_FROM_DICT_OR_GLOBALS is not a viable micro-op for tier 2 */ /* _LOAD_NAME is not a viable micro-op for tier 2 */ From d28afd3fa064db10a2eb2a65bba33e8ea77a8fcf Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 31 May 2024 14:05:51 -0700 Subject: [PATCH 021/373] gh-119180: Lazily wrap annotations on classmethod and staticmethod (#119864) --- Lib/test/test_descr.py | 38 ++++++- ...-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst | 3 + Objects/funcobject.c | 100 +++++++++++++++++- 3 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index c3f292467a6738..7742f075285602 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1593,8 +1593,7 @@ def f(cls, arg): self.fail("classmethod shouldn't accept keyword args") cm = classmethod(f) - cm_dict = {'__annotations__': {}, - '__doc__': ( + cm_dict = {'__doc__': ( "f docstring" if support.HAVE_DOCSTRINGS else None @@ -1610,6 +1609,41 @@ def f(cls, arg): del cm.x self.assertNotHasAttr(cm, "x") + def test_classmethod_staticmethod_annotations(self): + for deco in (classmethod, staticmethod): + @deco + def unannotated(cls): pass + @deco + def annotated(cls) -> int: pass + + for method in (annotated, unannotated): + with self.subTest(deco=deco, method=method): + original_annotations = dict(method.__wrapped__.__annotations__) + self.assertNotIn('__annotations__', method.__dict__) + self.assertEqual(method.__annotations__, original_annotations) + self.assertIn('__annotations__', method.__dict__) + + new_annotations = {"a": "b"} + method.__annotations__ = new_annotations + self.assertEqual(method.__annotations__, new_annotations) + self.assertEqual(method.__wrapped__.__annotations__, original_annotations) + + del method.__annotations__ + self.assertEqual(method.__annotations__, original_annotations) + + original_annotate = method.__wrapped__.__annotate__ + self.assertNotIn('__annotate__', method.__dict__) + self.assertIs(method.__annotate__, original_annotate) + self.assertIn('__annotate__', method.__dict__) + + new_annotate = lambda: {"annotations": 1} + method.__annotate__ = new_annotate + self.assertIs(method.__annotate__, new_annotate) + self.assertIs(method.__wrapped__.__annotate__, original_annotate) + + del method.__annotate__ + self.assertIs(method.__annotate__, original_annotate) + @support.refcount_test def test_refleaks_in_classmethod___init__(self): gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst new file mode 100644 index 00000000000000..1e5ad7d08eed7c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-08-23-41.gh-issue-119180.KL4VxZ.rst @@ -0,0 +1,3 @@ +:func:`classmethod` and :func:`staticmethod` now wrap the +:attr:`__annotations__` and :attr:`!__annotate__` attributes of their +underlying callable lazily. See :pep:`649`. Patch by Jelle Zijlstra. diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 4e78252052932c..40211297be20c0 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1172,12 +1172,57 @@ functools_wraps(PyObject *wrapper, PyObject *wrapped) COPY_ATTR(__name__); COPY_ATTR(__qualname__); COPY_ATTR(__doc__); - COPY_ATTR(__annotations__); return 0; #undef COPY_ATTR } +// Used for wrapping __annotations__ and __annotate__ on classmethod +// and staticmethod objects. +static PyObject * +descriptor_get_wrapped_attribute(PyObject *wrapped, PyObject *dict, PyObject *name) +{ + PyObject *res; + if (PyDict_GetItemRef(dict, name, &res) < 0) { + return NULL; + } + if (res != NULL) { + return res; + } + res = PyObject_GetAttr(wrapped, name); + if (res == NULL) { + return NULL; + } + if (PyDict_SetItem(dict, name, res) < 0) { + Py_DECREF(res); + return NULL; + } + return res; +} + +static int +descriptor_set_wrapped_attribute(PyObject *dict, PyObject *name, PyObject *value, + char *type_name) +{ + if (value == NULL) { + if (PyDict_DelItem(dict, name) < 0) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) { + PyErr_Clear(); + PyErr_Format(PyExc_AttributeError, + "'%.200s' object has no attribute '%U'", + type_name, name); + } + else { + return -1; + } + } + return 0; + } + else { + return PyDict_SetItem(dict, name, value); + } +} + /* Class method object */ @@ -1283,10 +1328,37 @@ cm_get___isabstractmethod__(classmethod *cm, void *closure) Py_RETURN_FALSE; } +static PyObject * +cm_get___annotations__(classmethod *cm, void *closure) +{ + return descriptor_get_wrapped_attribute(cm->cm_callable, cm->cm_dict, &_Py_ID(__annotations__)); +} + +static int +cm_set___annotations__(classmethod *cm, PyObject *value, void *closure) +{ + return descriptor_set_wrapped_attribute(cm->cm_dict, &_Py_ID(__annotations__), value, "classmethod"); +} + +static PyObject * +cm_get___annotate__(classmethod *cm, void *closure) +{ + return descriptor_get_wrapped_attribute(cm->cm_callable, cm->cm_dict, &_Py_ID(__annotate__)); +} + +static int +cm_set___annotate__(classmethod *cm, PyObject *value, void *closure) +{ + return descriptor_set_wrapped_attribute(cm->cm_dict, &_Py_ID(__annotate__), value, "classmethod"); +} + + static PyGetSetDef cm_getsetlist[] = { {"__isabstractmethod__", (getter)cm_get___isabstractmethod__, NULL, NULL, NULL}, {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict, NULL, NULL}, + {"__annotations__", (getter)cm_get___annotations__, (setter)cm_set___annotations__, NULL, NULL}, + {"__annotate__", (getter)cm_get___annotate__, (setter)cm_set___annotate__, NULL, NULL}, {NULL} /* Sentinel */ }; @@ -1479,10 +1551,36 @@ sm_get___isabstractmethod__(staticmethod *sm, void *closure) Py_RETURN_FALSE; } +static PyObject * +sm_get___annotations__(staticmethod *sm, void *closure) +{ + return descriptor_get_wrapped_attribute(sm->sm_callable, sm->sm_dict, &_Py_ID(__annotations__)); +} + +static int +sm_set___annotations__(staticmethod *sm, PyObject *value, void *closure) +{ + return descriptor_set_wrapped_attribute(sm->sm_dict, &_Py_ID(__annotations__), value, "staticmethod"); +} + +static PyObject * +sm_get___annotate__(staticmethod *sm, void *closure) +{ + return descriptor_get_wrapped_attribute(sm->sm_callable, sm->sm_dict, &_Py_ID(__annotate__)); +} + +static int +sm_set___annotate__(staticmethod *sm, PyObject *value, void *closure) +{ + return descriptor_set_wrapped_attribute(sm->sm_dict, &_Py_ID(__annotate__), value, "staticmethod"); +} + static PyGetSetDef sm_getsetlist[] = { {"__isabstractmethod__", (getter)sm_get___isabstractmethod__, NULL, NULL, NULL}, {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict, NULL, NULL}, + {"__annotations__", (getter)sm_get___annotations__, (setter)sm_set___annotations__, NULL, NULL}, + {"__annotate__", (getter)sm_get___annotate__, (setter)sm_set___annotate__, NULL, NULL}, {NULL} /* Sentinel */ }; From cc5cd4d93e3e079e897da9ceb1732ef16d79d01b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 31 May 2024 17:08:55 -0500 Subject: [PATCH 022/373] statistics.fmean(): speed-up code path for non-sizeable inputs. (gh-119876) --- Lib/statistics.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index c2f4fe8e054d3d..450edfaabe8def 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -505,13 +505,11 @@ def fmean(data, weights=None): n = len(data) except TypeError: # Handle iterators that do not define __len__(). - n = 0 - def count(iterable): - nonlocal n - for n, x in enumerate(iterable, start=1): - yield x - data = count(data) - total = fsum(data) + counter = count() + total = fsum(map(itemgetter(0), zip(data, counter))) + n = next(counter) + else: + total = fsum(data) if not n: raise StatisticsError('fmean requires at least one data point') return total / n From 3859e09e3d92d004978dd838f0511364e7edfb94 Mon Sep 17 00:00:00 2001 From: Alyssa Coghlan Date: Sat, 1 Jun 2024 13:59:35 +1000 Subject: [PATCH 023/373] gh-74929: PEP 667 C API documentation (gh-119379) * Add docs for new APIs * Add soft-deprecation notices * Add What's New porting entries * Update comments referencing `PyFrame_LocalsToFast()` to mention the proxy instead * Other related cleanups found when looking for refs to the deprecated APIs --- Doc/c-api/reflection.rst | 38 ++++++++++++++++++++++++++++++++++++++ Doc/data/refcounts.dat | 32 ++++++++++++++++++++++++++++++++ Doc/whatsnew/3.13.rst | 16 +++++++++++++++- Lib/test/test_sys.py | 13 +++++++++---- Objects/frameobject.c | 9 +++++++-- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- Python/sysmodule.c | 1 - 9 files changed, 104 insertions(+), 11 deletions(-) diff --git a/Doc/c-api/reflection.rst b/Doc/c-api/reflection.rst index 4b1c4770848a30..5dcfe40c2ce92b 100644 --- a/Doc/c-api/reflection.rst +++ b/Doc/c-api/reflection.rst @@ -7,18 +7,30 @@ Reflection .. c:function:: PyObject* PyEval_GetBuiltins(void) + .. deprecated:: 3.13 + + Use :c:func:`PyEval_GetFrameBuiltins` instead. + Return a dictionary of the builtins in the current execution frame, or the interpreter of the thread state if no frame is currently executing. .. c:function:: PyObject* PyEval_GetLocals(void) + .. deprecated:: 3.13 + + Use :c:func:`PyEval_GetFrameLocals` instead. + Return a dictionary of the local variables in the current execution frame, or ``NULL`` if no frame is currently executing. .. c:function:: PyObject* PyEval_GetGlobals(void) + .. deprecated:: 3.13 + + Use :c:func:`PyEval_GetFrameGlobals` instead. + Return a dictionary of the global variables in the current execution frame, or ``NULL`` if no frame is currently executing. @@ -31,6 +43,32 @@ Reflection See also :c:func:`PyThreadState_GetFrame`. +.. c:function:: PyObject* PyEval_GetFrameBuiltins(void) + + Return a dictionary of the builtins in the current execution frame, + or the interpreter of the thread state if no frame is currently executing. + + .. versionadded:: 3.13 + + +.. c:function:: PyObject* PyEval_GetFrameLocals(void) + + Return a dictionary of the local variables in the current execution frame, + or ``NULL`` if no frame is currently executing. Equivalent to calling + :func:`locals` in Python code. + + .. versionadded:: 3.13 + + +.. c:function:: PyObject* PyEval_GetFrameGlobals(void) + + Return a dictionary of the global variables in the current execution frame, + or ``NULL`` if no frame is currently executing. Equivalent to calling + :func:`globals` in Python code. + + .. versionadded:: 3.13 + + .. c:function:: const char* PyEval_GetFuncName(PyObject *func) Return the name of *func* if it is a function, class or instance object, else the diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 62a96146d605ff..a7d06e076a1b55 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -790,6 +790,12 @@ PyEval_GetGlobals:PyObject*::0: PyEval_GetFrame:PyObject*::0: +PyEval_GetFrameBuiltins:PyObject*::+1: + +PyEval_GetFrameLocals:PyObject*::+1: + +PyEval_GetFrameGlobals:PyObject*::+1: + PyEval_GetFuncDesc:const char*::: PyEval_GetFuncDesc:PyObject*:func:0: @@ -916,6 +922,32 @@ PyFloat_FromString:PyObject*:str:0: PyFloat_GetInfo:PyObject*::+1: PyFloat_GetInfo::void:: +PyFrame_GetBack:PyObject*::+1: +PyFrame_GetBack:PyFrameObject*:frame:0: + +PyFrame_GetBuiltins:PyObject*::+1: +PyFrame_GetBuiltins:PyFrameObject*:frame:0: + +PyFrame_GetCode:PyObject*::+1: +PyFrame_GetCode:PyFrameObject*:frame:0: + +PyFrame_GetGenerator:PyObject*::+1: +PyFrame_GetGenerator:PyFrameObject*:frame:0: + +PyFrame_GetGlobals:PyObject*::+1: +PyFrame_GetGlobals:PyFrameObject*:frame:0: + +PyFrame_GetLocals:PyObject*::+1: +PyFrame_GetLocals:PyFrameObject*:frame:0: + +PyFrame_GetVar:PyObject*::+1: +PyFrame_GetVar:PyFrameObject*:frame:0: +PyFrame_GetVar:PyObject*:name:0: + +PyFrame_GetVarString:PyObject*::+1: +PyFrame_GetVarString:PyFrameObject*:frame:0: +PyFrame_GetVarString:const char*:name:: + PyFrozenSet_Check:int::: PyFrozenSet_Check:PyObject*:p:0: diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 29bb3b81f6323c..3a52baf71310a3 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -97,7 +97,7 @@ Interpreter improvements: * :pep:`667`: The :func:`locals` builtin now has :ref:`defined semantics ` when mutating the returned mapping. Python debuggers and similar tools may now more reliably - update local variables in optimized frames even during concurrent code + update local variables in optimized scopes even during concurrent code execution. New typing features: @@ -2143,6 +2143,11 @@ New Features destruction the same way the :mod:`tracemalloc` module does. (Contributed by Pablo Galindo in :gh:`93502`.) +* Add :c:func:`PyEval_GetFrameBuiltins`, :c:func:`PyEval_GetFrameGlobals`, and + :c:func:`PyEval_GetFrameLocals` to the C API. These replacements for + :c:func:`PyEval_GetBuiltins`, :c:func:`PyEval_GetGlobals`, and + :c:func:`PyEval_GetLocals` return :term:`strong references ` + rather than borrowed references. (Added as part of :pep:`667`.) Build Changes ============= @@ -2318,6 +2323,15 @@ Changes in the C API to :c:func:`PyUnstable_Code_GetFirstFree`. (Contributed by Bogdan Romanyuk in :gh:`115781`.) +* :c:func:`!PyFrame_FastToLocals` and :c:func:`!PyFrame_FastToLocalsWithError` + no longer have any effect. Calling these functions has been redundant since + Python 3.11, when :c:func:`PyFrame_GetLocals` was first introduced. + (Changed as part of :pep:`667`.) + +* :c:func:`!PyFrame_LocalsToFast` no longer has any effect. Calling this function + is redundant now that :c:func:`PyFrame_GetLocals` returns a write-through proxy + for :term:`optimized scopes `. (Changed as part of :pep:`667`.) + Removed C APIs -------------- diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 8fe1d77756866a..1e5823f8883957 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -394,10 +394,15 @@ def test_dlopenflags(self): @test.support.refcount_test def test_refcount(self): - # n here must be a global in order for this test to pass while - # tracing with a python function. Tracing calls PyFrame_FastToLocals - # which will add a copy of any locals to the frame object, causing - # the reference count to increase by 2 instead of 1. + # n here originally had to be a global in order for this test to pass + # while tracing with a python function. Tracing used to call + # PyFrame_FastToLocals, which would add a copy of any locals to the + # frame object, causing the ref count to increase by 2 instead of 1. + # While that no longer happens (due to PEP 667), this test case retains + # its original global-based implementation + # PEP 683's immortal objects also made this point moot, since the + # refcount for None doesn't change anyway. Maybe this test should be + # using a different constant value? (e.g. an integer) global n self.assertRaises(TypeError, sys.getrefcount) c = sys.getrefcount(None) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index fc8d6c7a7aee89..5c65007dae46d2 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1888,8 +1888,7 @@ frame_get_var(_PyInterpreterFrame *frame, PyCodeObject *co, int i, } // (likely) Otherwise it is an arg (kind & CO_FAST_LOCAL), // with the initial value set when the frame was created... - // (unlikely) ...or it was set to some initial value by - // an earlier call to PyFrame_LocalsToFast(). + // (unlikely) ...or it was set via the f_locals proxy. } } } @@ -2002,18 +2001,24 @@ PyFrame_GetVarString(PyFrameObject *frame, const char *name) int PyFrame_FastToLocalsWithError(PyFrameObject *f) { + // Nothing to do here, as f_locals is now a write-through proxy in + // optimized frames. Soft-deprecated, since there's no maintenance hassle. return 0; } void PyFrame_FastToLocals(PyFrameObject *f) { + // Nothing to do here, as f_locals is now a write-through proxy in + // optimized frames. Soft-deprecated, since there's no maintenance hassle. return; } void PyFrame_LocalsToFast(PyFrameObject *f, int clear) { + // Nothing to do here, as f_locals is now a write-through proxy in + // optimized frames. Soft-deprecated, since there's no maintenance hassle. return; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1c12e1cddbbc10..413ad1105f9428 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1570,7 +1570,7 @@ dummy_func( inst(MAKE_CELL, (--)) { // "initial" is probably NULL but not if it's an arg (or set - // via PyFrame_LocalsToFast() before MAKE_CELL has run). + // via the f_locals proxy before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); PyObject *cell = PyCell_New(initial); if (cell == NULL) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0dfe490cb37047..bab629684c53f6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1541,7 +1541,7 @@ case _MAKE_CELL: { oparg = CURRENT_OPARG(); // "initial" is probably NULL but not if it's an arg (or set - // via PyFrame_LocalsToFast() before MAKE_CELL has run). + // via the f_locals proxy before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); PyObject *cell = PyCell_New(initial); if (cell == NULL) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1a991608385405..355be966cbb84a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4784,7 +4784,7 @@ next_instr += 1; INSTRUCTION_STATS(MAKE_CELL); // "initial" is probably NULL but not if it's an arg (or set - // via PyFrame_LocalsToFast() before MAKE_CELL has run). + // via the f_locals proxy before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); PyObject *cell = PyCell_New(initial); if (cell == NULL) { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 4da13e4552e786..00aa95531026b5 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -35,7 +35,6 @@ Data members: #include "pycore_sysmodule.h" // export _PySys_GetSizeOf() #include "pycore_tuple.h" // _PyTuple_FromArray() -#include "frameobject.h" // PyFrame_FastToLocalsWithError() #include "pydtrace.h" // PyDTrace_AUDIT() #include "osdefs.h" // DELIM #include "stdlib_module_names.h" // _Py_stdlib_module_names From 2180991ea3d50f56595edae241cc92dd4e7de642 Mon Sep 17 00:00:00 2001 From: Alyssa Coghlan Date: Sat, 1 Jun 2024 16:21:48 +1000 Subject: [PATCH 024/373] gh-118888: Further PEP 667 docs updates (gh-119893) * Clarify impact on default behaviour of exec, eval, etc * Update documentation for changes to PyEval_GetLocals (gh-74929) Closes gh-11888 --- Doc/c-api/reflection.rst | 21 +++++++++++++++++++-- Doc/whatsnew/3.13.rst | 26 +++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/reflection.rst b/Doc/c-api/reflection.rst index 5dcfe40c2ce92b..af9a1a74ec137e 100644 --- a/Doc/c-api/reflection.rst +++ b/Doc/c-api/reflection.rst @@ -19,11 +19,24 @@ Reflection .. deprecated:: 3.13 - Use :c:func:`PyEval_GetFrameLocals` instead. + To avoid creating a reference cycle in :term:`optimized scopes `, + use either :c:func:`PyEval_GetFrameLocals` to obtain the same behaviour as calling + :func:`locals` in Python code, or else call :c:func:`PyFrame_GetLocals` on the result + of :c:func:`PyEval_GetFrame` to get the same result as this function without having to + cache the proxy instance on the underlying frame. - Return a dictionary of the local variables in the current execution frame, + Return the :attr:`~frame.f_locals` attribute of the currently executing frame, or ``NULL`` if no frame is currently executing. + If the frame refers to an :term:`optimized scope`, this returns a + write-through proxy object that allows modifying the locals. + In all other cases (classes, modules, :func:`exec`, :func:`eval`) it returns + the mapping representing the frame locals directly (as described for + :func:`locals`). + + .. versionchanged:: 3.13 + As part of :pep:`667`, return a proxy object for optimized scopes. + .. c:function:: PyObject* PyEval_GetGlobals(void) @@ -57,6 +70,10 @@ Reflection or ``NULL`` if no frame is currently executing. Equivalent to calling :func:`locals` in Python code. + To access :attr:`~frame.f_locals` on the current frame without making an independent + snapshot in :term:`optimized scopes `, call :c:func:`PyFrame_GetLocals` + on the result of :c:func:`PyEval_GetFrame`. + .. versionadded:: 3.13 diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 3a52baf71310a3..ab260bf2a2d740 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -266,6 +266,21 @@ comprehensions, and generator expressions) to explicitly return independent snapshots of the currently assigned local variables, including locally referenced nonlocal variables captured in closures. +This change to the semantics of :func:`locals` in optimized scopes also affects the default +behaviour of code execution functions that implicitly target ``locals()`` if no explicit +namespace is provided (such as :func:`exec` and :func:`eval`). In previous versions, whether +or not changes could be accessed by calling ``locals()`` after calling the code execution +function was implementation dependent. In CPython specifically, such code would typically +appear to work as desired, but could sometimes fail in optimized scopes based on other code +(including debuggers and code execution tracing tools) potentially resetting the shared +snapshot in that scope. Now, the code will always run against an independent snapshot of the +local variables in optimized scopes, and hence the changes will never be visible in +subsequent calls to ``locals()``. To access the changes made in these cases, an explicit +namespace reference must now be passed to the relevant function. Alternatively, it may make +sense to update affected code to use a higher level code execution API that returns the +resulting code execution namespace (e.g. :func:`runpy.run_path` when executing Python +files from disk). + To ensure debuggers and similar tools can reliably update local variables in scopes affected by this change, :attr:`FrameType.f_locals ` now returns a write-through proxy to the frame's local and locally referenced @@ -2235,7 +2250,10 @@ Changes in the Python API independent snapshot on each call, and hence no longer implicitly updates previously returned references. Obtaining the legacy CPython behaviour now requires explicit calls to update the initially returned dictionary with the - results of subsequent calls to ``locals()``. (Changed as part of :pep:`667`.) + results of subsequent calls to ``locals()``. Code execution functions that + implicitly target ``locals()`` (such as ``exec`` and ``eval``) must be + passed an explicit namespace to access their results in an optimized scope. + (Changed as part of :pep:`667`.) * Calling :func:`locals` from a comprehension at module or class scope (including via ``exec`` or ``eval``) once more behaves as if the comprehension @@ -2323,6 +2341,12 @@ Changes in the C API to :c:func:`PyUnstable_Code_GetFirstFree`. (Contributed by Bogdan Romanyuk in :gh:`115781`.) +* Calling :c:func:`PyFrame_GetLocals` or :c:func:`PyEval_GetLocals` in an + :term:`optimized scope` now returns a write-through proxy rather than a + snapshot that gets updated at ill-specified times. If a snapshot is desired, + it must be created explicitly (e.g. with :c:func:`PyDict_Copy`) or by calling + the new :c:func:`PyEval_GetFrameLocals` API. (Changed as part of :pep:`667`.) + * :c:func:`!PyFrame_FastToLocals` and :c:func:`!PyFrame_FastToLocalsWithError` no longer have any effect. Calling these functions has been redundant since Python 3.11, when :c:func:`PyFrame_GetLocals` was first introduced. From 51191dbfdd115b6de481a1cb5c86b952d622c2d7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 1 Jun 2024 10:11:53 +0000 Subject: [PATCH 025/373] build(deps-dev): bump types-setuptools from 69.5.0.20240423 to 70.0.0.20240524 in /Tools (#119899) --- Tools/requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index 1767727373918f..f430ddedb9f856 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -4,4 +4,4 @@ mypy==1.10.0 # needed for peg_generator: types-psutil==5.9.5.20240423 -types-setuptools==69.5.0.20240423 +types-setuptools==70.0.0.20240524 From 5152120ae746516670c77e7feed5c4a8912f2bbb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 1 Jun 2024 10:38:13 +0000 Subject: [PATCH 026/373] Bump types-psutil from 5.9.5.20240423 to 5.9.5.20240516 in /Tools (#119900) --- Tools/requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index f430ddedb9f856..44316e3d7d8ac5 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -3,5 +3,5 @@ mypy==1.10.0 # needed for peg_generator: -types-psutil==5.9.5.20240423 +types-psutil==5.9.5.20240516 types-setuptools==70.0.0.20240524 From 60593b2052ca275559c11028d50e19f8e5dfee13 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 1 Jun 2024 10:04:05 -0400 Subject: [PATCH 027/373] gh-117657: Fix TSAN race in free-threaded GC (#119883) Only call `gc_restore_tid()` from stop-the-world contexts. `worklist_pop()` can be called while other threads are running, so use a relaxed atomic to modify `ob_tid`. --- Python/gc_free_threading.c | 5 ++--- Tools/tsan/suppressions_free_threading.txt | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index ee006bb4aa12b7..e6bd012c40ee82 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -86,7 +86,7 @@ worklist_pop(struct worklist *worklist) PyObject *op = (PyObject *)worklist->head; if (op != NULL) { worklist->head = op->ob_tid; - op->ob_tid = 0; + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0); } return op; } @@ -189,6 +189,7 @@ merge_refcount(PyObject *op, Py_ssize_t extra) static void gc_restore_tid(PyObject *op) { + assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); mi_segment_t *segment = _mi_ptr_segment(op); if (_Py_REF_IS_MERGED(op->ob_ref_shared)) { op->ob_tid = 0; @@ -676,7 +677,6 @@ call_weakref_callbacks(struct collection_state *state) Py_DECREF(temp); } - gc_restore_tid(op); Py_DECREF(op); // drop worklist reference } } @@ -986,7 +986,6 @@ cleanup_worklist(struct worklist *worklist) { PyObject *op; while ((op = worklist_pop(worklist)) != NULL) { - gc_restore_tid(op); gc_clear_unreachable(op); Py_DECREF(op); } diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index cda57d78067bb3..951635e7c6533d 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -37,7 +37,6 @@ race_top:_PyImport_ReleaseLock race_top:_PyParkingLot_Park race_top:_PyType_HasFeature race_top:assign_version_tag -race_top:gc_restore_tid race_top:insertdict race_top:lookup_tp_dict race_top:mi_heap_visit_pages @@ -64,7 +63,6 @@ race_top:list_get_item_ref race_top:make_pending_calls race_top:set_add_entry race_top:should_intern_string -race_top:worklist_pop race_top:_PyEval_IsGILEnabled race_top:llist_insert_tail race_top:_Py_slot_tp_getattr_hook @@ -86,7 +84,6 @@ race_top:sock_close race_top:tstate_delete_common race_top:tstate_is_freed race_top:type_modified_unlocked -race_top:update_refs race_top:write_thread_id race_top:PyThreadState_Clear From 90ec19fd33e2452902b9788d4821f1fbf6542304 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 1 Jun 2024 10:04:38 -0400 Subject: [PATCH 028/373] gh-117657: Fix TSAN race in QSBR assertion (#119887) Due to a limitation in TSAN, all reads from `PyThreadState.state` must be atomic to avoid reported races. --- Python/qsbr.c | 3 ++- Tools/tsan/suppressions_free_threading.txt | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/qsbr.c b/Python/qsbr.c index 9cbce9044e2941..a7321154a62ffc 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -160,7 +160,8 @@ qsbr_poll_scan(struct _qsbr_shared *shared) bool _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal) { - assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED); + assert(_Py_atomic_load_int_relaxed(&_PyThreadState_GET()->state) == _Py_THREAD_ATTACHED); + if (_Py_qbsr_goal_reached(qsbr, goal)) { return true; } diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 951635e7c6533d..9a53990f8b2ff8 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -75,8 +75,6 @@ race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize race_top:PyInterpreterState_ThreadHead race_top:_PyObject_TryGetInstanceAttribute -race_top:_Py_qsbr_unregister -race_top:_Py_qsbr_poll race_top:PyThreadState_Next race_top:Py_TYPE race_top:PyUnstable_InterpreterFrame_GetLine From ce2ea7d629788fd051cbec099b5947ecbe50e819 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 1 Jun 2024 10:49:14 -0500 Subject: [PATCH 029/373] Minor speed/accuracy improvement for kde() (gh-119910) --- Lib/statistics.py | 17 +++++++++-------- Lib/test/test_statistics.py | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 450edfaabe8def..c36145fe7f2a79 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -953,12 +953,14 @@ def kde(data, h, kernel='normal', *, cumulative=False): case 'quartic' | 'biweight': K = lambda t: 15/16 * (1.0 - t * t) ** 2 - W = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2 + W = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), + (t**5, t**3, t, 1.0)) support = 1.0 case 'triweight': K = lambda t: 35/32 * (1.0 - t * t) ** 3 - W = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2 + W = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), + (t**7, t**5, t**3, t, 1.0)) support = 1.0 case 'cosine': @@ -974,12 +976,10 @@ def kde(data, h, kernel='normal', *, cumulative=False): if support is None: def pdf(x): - n = len(data) - return sum(K((x - x_i) / h) for x_i in data) / (n * h) + return sum(K((x - x_i) / h) for x_i in data) / (len(data) * h) def cdf(x): - n = len(data) - return sum(W((x - x_i) / h) for x_i in data) / n + return sum(W((x - x_i) / h) for x_i in data) / len(data) else: @@ -1732,7 +1732,7 @@ def _quartic_invcdf_estimate(p): _quartic_invcdf = _newton_raphson( f_inv_estimate = _quartic_invcdf_estimate, - f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2, + f = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), (t**5, t**3, t, 1.0)), f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2) def _triweight_invcdf_estimate(p): @@ -1742,7 +1742,8 @@ def _triweight_invcdf_estimate(p): _triweight_invcdf = _newton_raphson( f_inv_estimate = _triweight_invcdf_estimate, - f = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2, + f = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), + (t**7, t**5, t**3, t, 1.0)), f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3) _kernel_invcdfs = { diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 6f68edd447c953..cded8aba6e8cd7 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2444,7 +2444,7 @@ def test_kde_kernel_invcdfs(self): with self.subTest(kernel=kernel): cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True) for x in xarr: - self.assertAlmostEqual(invcdf(cdf(x)), x, places=5) + self.assertAlmostEqual(invcdf(cdf(x)), x, places=6) @support.requires_resource('cpu') def test_kde_random(self): From cf3bba3f0671d2c9fee099e3ab0f78b98b176131 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sat, 1 Jun 2024 19:05:19 +0300 Subject: [PATCH 030/373] gh-113892: Add a extra check to `ProactorEventLoop.sock_connect` to ensure that the given socket is in non-blocking mode (#119519) --- Lib/asyncio/proactor_events.py | 2 ++ Lib/test/test_asyncio/test_proactor_events.py | 9 +++++++-- .../2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst diff --git a/Lib/asyncio/proactor_events.py b/Lib/asyncio/proactor_events.py index 397a8cda757895..7eb55bd63ddb73 100644 --- a/Lib/asyncio/proactor_events.py +++ b/Lib/asyncio/proactor_events.py @@ -721,6 +721,8 @@ async def sock_sendto(self, sock, data, address): return await self._proactor.sendto(sock, data, 0, address) async def sock_connect(self, sock, address): + if self._debug and sock.gettimeout() != 0: + raise ValueError("the socket must be non-blocking") return await self._proactor.connect(sock, address) async def sock_accept(self, sock): diff --git a/Lib/test/test_asyncio/test_proactor_events.py b/Lib/test/test_asyncio/test_proactor_events.py index fcaa2f6ade2b76..4b3d551dd7b3a2 100644 --- a/Lib/test/test_asyncio/test_proactor_events.py +++ b/Lib/test/test_asyncio/test_proactor_events.py @@ -1018,9 +1018,9 @@ def setUp(self): self.addCleanup(self.file.close) super().setUp() - def make_socket(self, cleanup=True): + def make_socket(self, cleanup=True, blocking=False): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.setblocking(False) + sock.setblocking(blocking) sock.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, 1024) sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 1024) if cleanup: @@ -1082,6 +1082,11 @@ def test_sock_sendfile_not_regular_file(self): 0, None)) self.assertEqual(self.file.tell(), 0) + def test_blocking_socket(self): + self.loop.set_debug(True) + sock = self.make_socket(blocking=True) + with self.assertRaisesRegex(ValueError, "must be non-blocking"): + self.run_loop(self.loop.sock_sendfile(sock, self.file)) if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst new file mode 100644 index 00000000000000..639d5abe878344 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst @@ -0,0 +1,3 @@ +Now, the method ``sock_connect`` of :class:`asyncio.ProactorEventLoop` +raises a :exc:`ValueError` if given socket is not in +non-blocking mode, as well as in other loop implementations. From 7dc745d1f5d9558047a52cad5e01df7567533269 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 1 Jun 2024 12:15:58 -0400 Subject: [PATCH 031/373] gh-117657: Add TSAN suppression for `set_discard_entry` (#119908) Seen in CI occasionally when running `test_weakref`. --- Tools/tsan/suppressions_free_threading.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 9a53990f8b2ff8..f855e9ce2698a5 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -44,6 +44,8 @@ race_top:PyMember_GetOne race_top:PyMember_SetOne race_top:new_reference race_top:set_contains_key +# https://gist.github.com/colesbury/d13d033f413b4ad07929d044bed86c35 +race_top:set_discard_entry race_top:set_inheritable race_top:start_the_world race_top:tstate_set_detached From 63111bfcf021db29ce6ef9ffa4117ffb7a2cb868 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 1 Jun 2024 11:30:24 -0500 Subject: [PATCH 032/373] Add unique() recipe to itertools docs (gh-119911) --- Doc/library/itertools.rst | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 121bfd3de343c4..3dc3f60923a0ba 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -857,7 +857,7 @@ and :term:`generators ` which incur interpreter overhead. return len(take(2, groupby(iterable, key))) <= 1 def unique_justseen(iterable, key=None): - "List unique elements, preserving order. Remember only the element just seen." + "Yield unique elements, preserving order. Remember only the element just seen." # unique_justseen('AAAABBBCCDAABBB') → A B C D A B # unique_justseen('ABBcCAD', str.casefold) → A B c A D if key is None: @@ -865,7 +865,7 @@ and :term:`generators ` which incur interpreter overhead. return map(next, map(operator.itemgetter(1), groupby(iterable, key))) def unique_everseen(iterable, key=None): - "List unique elements, preserving order. Remember all elements ever seen." + "Yield unique elements, preserving order. Remember all elements ever seen." # unique_everseen('AAAABBBCCDAABBB') → A B C D # unique_everseen('ABBcCAD', str.casefold) → A B c D seen = set() @@ -880,6 +880,11 @@ and :term:`generators ` which incur interpreter overhead. seen.add(k) yield element + def unique(iterable, key=None, reverse=False): + "Yield unique elements in sorted order. Supports unhashable inputs." + # unique([[1, 2], [3, 4], [1, 2]]) → [1, 2] [3, 4] + return unique_justseen(sorted(iterable, key=key, reverse=reverse), key=key) + def sliding_window(iterable, n): "Collect data into overlapping fixed-length chunks or blocks." # sliding_window('ABCDEFG', 4) → ABCD BCDE CDEF DEFG @@ -1605,6 +1610,13 @@ The following recipes have a more mathematical flavor: >>> ''.join(input_iterator) 'AAABBBCCDAABBB' + >>> list(unique([[1, 2], [3, 4], [1, 2]])) + [[1, 2], [3, 4]] + >>> list(unique('ABBcCAD', str.casefold)) + ['A', 'B', 'c', 'D'] + >>> list(unique('ABBcCAD', str.casefold, reverse=True)) + ['D', 'c', 'B', 'A'] + >>> d = dict(a=1, b=2, c=3) >>> it = iter_except(d.popitem, KeyError) >>> d['d'] = 4 From 53b1981fb0cda6c656069e992f172fc6aad7c99c Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 1 Jun 2024 19:49:12 +0100 Subject: [PATCH 033/373] GH-89727: Fix `shutil.rmtree()` recursion error on deep trees (#119808) Implement `shutil._rmtree_safe_fd()` using a list as a stack to avoid emitting recursion errors on deeply nested trees. `shutil._rmtree_unsafe()` was fixed in a150679f90. --- Lib/shutil.py | 162 +++++++----------- Lib/test/test_shutil.py | 1 - ...4-05-30-21-37-05.gh-issue-89727.D6S9ig.rst | 2 + 3 files changed, 68 insertions(+), 97 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst diff --git a/Lib/shutil.py b/Lib/shutil.py index 03a9d756030430..b0d49e98cfe5f9 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -635,81 +635,76 @@ def onerror(err): onexc(os.rmdir, path, err) # Version using fd-based APIs to protect against races -def _rmtree_safe_fd(topfd, path, onexc): +def _rmtree_safe_fd(stack, onexc): + # Each stack item has four elements: + # * func: The first operation to perform: os.lstat, os.close or os.rmdir. + # Walking a directory starts with an os.lstat() to detect symlinks; in + # this case, func is updated before subsequent operations and passed to + # onexc() if an error occurs. + # * dirfd: Open file descriptor, or None if we're processing the top-level + # directory given to rmtree() and the user didn't supply dir_fd. + # * path: Path of file to operate upon. This is passed to onexc() if an + # error occurs. + # * orig_entry: os.DirEntry, or None if we're processing the top-level + # directory given to rmtree(). We used the cached stat() of the entry to + # save a call to os.lstat() when walking subdirectories. + func, dirfd, path, orig_entry = stack.pop() + name = path if orig_entry is None else orig_entry.name try: + if func is os.close: + os.close(dirfd) + return + if func is os.rmdir: + os.rmdir(name, dir_fd=dirfd) + return + + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + assert func is os.lstat + if orig_entry is None: + orig_st = os.lstat(name, dir_fd=dirfd) + else: + orig_st = orig_entry.stat(follow_symlinks=False) + + func = os.open # For error reporting. + topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd) + + func = os.path.islink # For error reporting. + try: + if not os.path.samestat(orig_st, os.fstat(topfd)): + # Symlinks to directories are forbidden, see GH-46010. + raise OSError("Cannot call rmtree on a symbolic link") + stack.append((os.rmdir, dirfd, path, orig_entry)) + finally: + stack.append((os.close, topfd, path, orig_entry)) + + func = os.scandir # For error reporting. with os.scandir(topfd) as scandir_it: entries = list(scandir_it) - except FileNotFoundError: - return - except OSError as err: - err.filename = path - onexc(os.scandir, path, err) - return - for entry in entries: - fullname = os.path.join(path, entry.name) - try: - is_dir = entry.is_dir(follow_symlinks=False) - except FileNotFoundError: - continue - except OSError: - is_dir = False - else: - if is_dir: - try: - orig_st = entry.stat(follow_symlinks=False) - is_dir = stat.S_ISDIR(orig_st.st_mode) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.lstat, fullname, err) - continue - if is_dir: + for entry in entries: + fullname = os.path.join(path, entry.name) try: - dirfd = os.open(entry.name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=topfd) - dirfd_closed = False + if entry.is_dir(follow_symlinks=False): + # Traverse into sub-directory. + stack.append((os.lstat, topfd, fullname, entry)) + continue except FileNotFoundError: continue - except OSError as err: - onexc(os.open, fullname, err) - else: - try: - if os.path.samestat(orig_st, os.fstat(dirfd)): - _rmtree_safe_fd(dirfd, fullname, onexc) - try: - os.close(dirfd) - except OSError as err: - # close() should not be retried after an error. - dirfd_closed = True - onexc(os.close, fullname, err) - dirfd_closed = True - try: - os.rmdir(entry.name, dir_fd=topfd) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.rmdir, fullname, err) - else: - try: - # This can only happen if someone replaces - # a directory with a symlink after the call to - # os.scandir or stat.S_ISDIR above. - raise OSError("Cannot call rmtree on a symbolic " - "link") - except OSError as err: - onexc(os.path.islink, fullname, err) - finally: - if not dirfd_closed: - try: - os.close(dirfd) - except OSError as err: - onexc(os.close, fullname, err) - else: + except OSError: + pass try: os.unlink(entry.name, dir_fd=topfd) except FileNotFoundError: continue except OSError as err: onexc(os.unlink, fullname, err) + except FileNotFoundError as err: + if orig_entry is None or func is os.close: + err.filename = path + onexc(func, path, err) + except OSError as err: + err.filename = path + onexc(func, path, err) _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= os.supports_dir_fd and @@ -762,41 +757,16 @@ def onexc(*args): # While the unsafe rmtree works fine on bytes, the fd based does not. if isinstance(path, bytes): path = os.fsdecode(path) - # Note: To guard against symlink races, we use the standard - # lstat()/open()/fstat() trick. - try: - orig_st = os.lstat(path, dir_fd=dir_fd) - except OSError as err: - onexc(os.lstat, path, err) - return + stack = [(os.lstat, dir_fd, path, None)] try: - fd = os.open(path, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dir_fd) - fd_closed = False - except OSError as err: - onexc(os.open, path, err) - return - try: - if os.path.samestat(orig_st, os.fstat(fd)): - _rmtree_safe_fd(fd, path, onexc) - try: - os.close(fd) - except OSError as err: - # close() should not be retried after an error. - fd_closed = True - onexc(os.close, path, err) - fd_closed = True - try: - os.rmdir(path, dir_fd=dir_fd) - except OSError as err: - onexc(os.rmdir, path, err) - else: - try: - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, path, err) + while stack: + _rmtree_safe_fd(stack, onexc) finally: - if not fd_closed: + # Close any file descriptors still on the stack. + while stack: + func, fd, path, entry = stack.pop() + if func is not os.close: + continue try: os.close(fd) except OSError as err: diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 01f139073dcd97..bccb81e0737c57 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -741,7 +741,6 @@ def _onexc(fn, path, exc): shutil.rmtree(TESTFN) raise - @unittest.skipIf(shutil._use_fd_functions, "fd-based functions remain unfixed (GH-89727)") def test_rmtree_above_recursion_limit(self): recursion_limit = 40 # directory_depth > recursion_limit diff --git a/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst b/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst new file mode 100644 index 00000000000000..854c56609acb8c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst @@ -0,0 +1,2 @@ +Fix issue with :func:`shutil.rmtree` where a :exc:`RecursionError` is raised +on deep directory trees. From c618f7d80e78f83cc24b6bdead33ca38cbd4d27f Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Sat, 1 Jun 2024 23:20:00 +0200 Subject: [PATCH 034/373] gh-119016: Remove outdated sentences from the "classes" tutorial (#119130) Co-authored-by: Alex Waygood --- Doc/tutorial/classes.rst | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Doc/tutorial/classes.rst b/Doc/tutorial/classes.rst index 7ab528acb370f2..1b64741c349ee9 100644 --- a/Doc/tutorial/classes.rst +++ b/Doc/tutorial/classes.rst @@ -338,11 +338,7 @@ code will print the value ``16``, without leaving a trace:: del x.counter The other kind of instance attribute reference is a *method*. A method is a -function that "belongs to" an object. (In Python, the term method is not unique -to class instances: other object types can have methods as well. For example, -list objects have methods called append, insert, remove, sort, and so on. -However, in the following discussion, we'll use the term method exclusively to -mean methods of class instance objects, unless explicitly stated otherwise.) +function that "belongs to" an object. .. index:: pair: object; method From e378dc15b52985724b6ae4782c4ef0afc3393ca9 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 1 Jun 2024 22:07:46 -0500 Subject: [PATCH 035/373] Refactor (mostly rearrange) the statistics module (gh-119930) --- Lib/statistics.py | 1851 ++++++++++++++++++----------------- Lib/test/test_statistics.py | 5 +- 2 files changed, 952 insertions(+), 904 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index c36145fe7f2a79..c64c6fae4ab010 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -147,445 +147,148 @@ _SQRT2 = sqrt(2.0) _random = random -# === Exceptions === +## Exceptions ############################################################## class StatisticsError(ValueError): pass -# === Private utilities === +## Measures of central tendency (averages) ################################# -def _sum(data): - """_sum(data) -> (type, sum, count) - - Return a high-precision sum of the given numeric data as a fraction, - together with the type to be converted to and the count of items. - - Examples - -------- - - >>> _sum([3, 2.25, 4.5, -0.5, 0.25]) - (, Fraction(19, 2), 5) - - Some sources of round-off error will be avoided: - - # Built-in sum returns zero. - >>> _sum([1e50, 1, -1e50] * 1000) - (, Fraction(1000, 1), 3000) +def mean(data): + """Return the sample arithmetic mean of data. - Fractions and Decimals are also supported: + >>> mean([1, 2, 3, 4, 4]) + 2.8 >>> from fractions import Fraction as F - >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) - (, Fraction(63, 20), 4) + >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)]) + Fraction(13, 21) >>> from decimal import Decimal as D - >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] - >>> _sum(data) - (, Fraction(6963, 10000), 4) + >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")]) + Decimal('0.5625') + + If ``data`` is empty, StatisticsError will be raised. - Mixed types are currently treated as an error, except that int is - allowed. """ - count = 0 - types = set() - types_add = types.add - partials = {} - partials_get = partials.get - for typ, values in groupby(data, type): - types_add(typ) - for n, d in map(_exact_ratio, values): - count += 1 - partials[d] = partials_get(d, 0) + n - if None in partials: - # The sum will be a NAN or INF. We can ignore all the finite - # partials, and just look at this special one. - total = partials[None] - assert not _isfinite(total) - else: - # Sum all the partial sums using builtin sum. - total = sum(Fraction(n, d) for d, n in partials.items()) - T = reduce(_coerce, types, int) # or raise TypeError - return (T, total, count) + T, total, n = _sum(data) + if n < 1: + raise StatisticsError('mean requires at least one data point') + return _convert(total / n, T) -def _ss(data, c=None): - """Return the exact mean and sum of square deviations of sequence data. +def fmean(data, weights=None): + """Convert data to floats and compute the arithmetic mean. - Calculations are done in a single pass, allowing the input to be an iterator. + This runs faster than the mean() function and it always returns a float. + If the input dataset is empty, it raises a StatisticsError. - If given *c* is used the mean; otherwise, it is calculated from the data. - Use the *c* argument with care, as it can lead to garbage results. + >>> fmean([3.5, 4.0, 5.25]) + 4.25 """ - if c is not None: - T, ssd, count = _sum((d := x - c) * d for x in data) - return (T, ssd, c, count) - count = 0 - types = set() - types_add = types.add - sx_partials = defaultdict(int) - sxx_partials = defaultdict(int) - for typ, values in groupby(data, type): - types_add(typ) - for n, d in map(_exact_ratio, values): - count += 1 - sx_partials[d] += n - sxx_partials[d] += n * n - if not count: - ssd = c = Fraction(0) - elif None in sx_partials: - # The sum will be a NAN or INF. We can ignore all the finite - # partials, and just look at this special one. - ssd = c = sx_partials[None] - assert not _isfinite(ssd) - else: - sx = sum(Fraction(n, d) for d, n in sx_partials.items()) - sxx = sum(Fraction(n, d*d) for d, n in sxx_partials.items()) - # This formula has poor numeric properties for floats, - # but with fractions it is exact. - ssd = (count * sxx - sx * sx) / count - c = sx / count - T = reduce(_coerce, types, int) # or raise TypeError - return (T, ssd, c, count) + if weights is None: + try: + n = len(data) + except TypeError: + # Handle iterators that do not define __len__(). + counter = count() + total = fsum(map(itemgetter(0), zip(data, counter))) + n = next(counter) + else: + total = fsum(data) -def _isfinite(x): - try: - return x.is_finite() # Likely a Decimal. - except AttributeError: - return math.isfinite(x) # Coerces to float first. + if not n: + raise StatisticsError('fmean requires at least one data point') + return total / n -def _coerce(T, S): - """Coerce types T and S to a common type, or raise TypeError. + if not isinstance(weights, (list, tuple)): + weights = list(weights) - Coercion rules are currently an implementation detail. See the CoerceTest - test class in test_statistics for details. - """ - # See http://bugs.python.org/issue24068. - assert T is not bool, "initial type T is bool" - # If the types are the same, no need to coerce anything. Put this - # first, so that the usual case (no coercion needed) happens as soon - # as possible. - if T is S: return T - # Mixed int & other coerce to the other type. - if S is int or S is bool: return T - if T is int: return S - # If one is a (strict) subclass of the other, coerce to the subclass. - if issubclass(S, T): return S - if issubclass(T, S): return T - # Ints coerce to the other type. - if issubclass(T, int): return S - if issubclass(S, int): return T - # Mixed fraction & float coerces to float (or float subclass). - if issubclass(T, Fraction) and issubclass(S, float): - return S - if issubclass(T, float) and issubclass(S, Fraction): - return T - # Any other combination is disallowed. - msg = "don't know how to coerce %s and %s" - raise TypeError(msg % (T.__name__, S.__name__)) + try: + num = sumprod(data, weights) + except ValueError: + raise StatisticsError('data and weights must be the same length') + den = fsum(weights) -def _exact_ratio(x): - """Return Real number x to exact (numerator, denominator) pair. + if not den: + raise StatisticsError('sum of weights must be non-zero') - >>> _exact_ratio(0.25) - (1, 4) + return num / den - x is expected to be an int, Fraction, Decimal or float. - """ - # XXX We should revisit whether using fractions to accumulate exact - # ratios is the right way to go. +def geometric_mean(data): + """Convert data to floats and compute the geometric mean. - # The integer ratios for binary floats can have numerators or - # denominators with over 300 decimal digits. The problem is more - # acute with decimal floats where the default decimal context - # supports a huge range of exponents from Emin=-999999 to - # Emax=999999. When expanded with as_integer_ratio(), numbers like - # Decimal('3.14E+5000') and Decimal('3.14E-5000') have large - # numerators or denominators that will slow computation. + Raises a StatisticsError if the input dataset is empty + or if it contains a negative value. - # When the integer ratios are accumulated as fractions, the size - # grows to cover the full range from the smallest magnitude to the - # largest. For example, Fraction(3.14E+300) + Fraction(3.14E-300), - # has a 616 digit numerator. Likewise, - # Fraction(Decimal('3.14E+5000')) + Fraction(Decimal('3.14E-5000')) - # has 10,003 digit numerator. + Returns zero if the product of inputs is zero. - # This doesn't seem to have been problem in practice, but it is a - # potential pitfall. + No special efforts are made to achieve exact results. + (However, this may change in the future.) - try: - return x.as_integer_ratio() - except AttributeError: - pass - except (OverflowError, ValueError): - # float NAN or INF. - assert not _isfinite(x) - return (x, None) - try: - # x may be an Integral ABC. - return (x.numerator, x.denominator) - except AttributeError: - msg = f"can't convert type '{type(x).__name__}' to numerator/denominator" - raise TypeError(msg) + >>> round(geometric_mean([54, 24, 36]), 9) + 36.0 + """ + n = 0 + found_zero = False -def _convert(value, T): - """Convert value to given numeric type T.""" - if type(value) is T: - # This covers the cases where T is Fraction, or where value is - # a NAN or INF (Decimal or float). - return value - if issubclass(T, int) and value.denominator != 1: - T = float - try: - # FIXME: what do we do if this overflows? - return T(value) - except TypeError: - if issubclass(T, Decimal): - return T(value.numerator) / T(value.denominator) - else: - raise + def count_positive(iterable): + nonlocal n, found_zero + for n, x in enumerate(iterable, start=1): + if x > 0.0 or math.isnan(x): + yield x + elif x == 0.0: + found_zero = True + else: + raise StatisticsError('No negative inputs allowed', x) + total = fsum(map(log, count_positive(data))) + if not n: + raise StatisticsError('Must have a non-empty dataset') + if math.isnan(total): + return math.nan + if found_zero: + return math.nan if total == math.inf else 0.0 -def _fail_neg(values, errmsg='negative value'): - """Iterate over values, failing if any are less than zero.""" - for x in values: - if x < 0: - raise StatisticsError(errmsg) - yield x + return exp(total / n) -def _rank(data, /, *, key=None, reverse=False, ties='average', start=1) -> list[float]: - """Rank order a dataset. The lowest value has rank 1. +def harmonic_mean(data, weights=None): + """Return the harmonic mean of data. - Ties are averaged so that equal values receive the same rank: + The harmonic mean is the reciprocal of the arithmetic mean of the + reciprocals of the data. It can be used for averaging ratios or + rates, for example speeds. - >>> data = [31, 56, 31, 25, 75, 18] - >>> _rank(data) - [3.5, 5.0, 3.5, 2.0, 6.0, 1.0] + Suppose a car travels 40 km/hr for 5 km and then speeds-up to + 60 km/hr for another 5 km. What is the average speed? - The operation is idempotent: + >>> harmonic_mean([40, 60]) + 48.0 - >>> _rank([3.5, 5.0, 3.5, 2.0, 6.0, 1.0]) - [3.5, 5.0, 3.5, 2.0, 6.0, 1.0] + Suppose a car travels 40 km/hr for 5 km, and when traffic clears, + speeds-up to 60 km/hr for the remaining 30 km of the journey. What + is the average speed? - It is possible to rank the data in reverse order so that the - highest value has rank 1. Also, a key-function can extract - the field to be ranked: - - >>> goals = [('eagles', 45), ('bears', 48), ('lions', 44)] - >>> _rank(goals, key=itemgetter(1), reverse=True) - [2.0, 1.0, 3.0] - - Ranks are conventionally numbered starting from one; however, - setting *start* to zero allows the ranks to be used as array indices: - - >>> prize = ['Gold', 'Silver', 'Bronze', 'Certificate'] - >>> scores = [8.1, 7.3, 9.4, 8.3] - >>> [prize[int(i)] for i in _rank(scores, start=0, reverse=True)] - ['Bronze', 'Certificate', 'Gold', 'Silver'] - - """ - # If this function becomes public at some point, more thought - # needs to be given to the signature. A list of ints is - # plausible when ties is "min" or "max". When ties is "average", - # either list[float] or list[Fraction] is plausible. - - # Default handling of ties matches scipy.stats.mstats.spearmanr. - if ties != 'average': - raise ValueError(f'Unknown tie resolution method: {ties!r}') - if key is not None: - data = map(key, data) - val_pos = sorted(zip(data, count()), reverse=reverse) - i = start - 1 - result = [0] * len(val_pos) - for _, g in groupby(val_pos, key=itemgetter(0)): - group = list(g) - size = len(group) - rank = i + (size + 1) / 2 - for value, orig_pos in group: - result[orig_pos] = rank - i += size - return result - - -def _integer_sqrt_of_frac_rto(n: int, m: int) -> int: - """Square root of n/m, rounded to the nearest integer using round-to-odd.""" - # Reference: https://www.lri.fr/~melquion/doc/05-imacs17_1-expose.pdf - a = math.isqrt(n // m) - return a | (a*a*m != n) - - -# For 53 bit precision floats, the bit width used in -# _float_sqrt_of_frac() is 109. -_sqrt_bit_width: int = 2 * sys.float_info.mant_dig + 3 - - -def _float_sqrt_of_frac(n: int, m: int) -> float: - """Square root of n/m as a float, correctly rounded.""" - # See principle and proof sketch at: https://bugs.python.org/msg407078 - q = (n.bit_length() - m.bit_length() - _sqrt_bit_width) // 2 - if q >= 0: - numerator = _integer_sqrt_of_frac_rto(n, m << 2 * q) << q - denominator = 1 - else: - numerator = _integer_sqrt_of_frac_rto(n << -2 * q, m) - denominator = 1 << -q - return numerator / denominator # Convert to float - - -def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: - """Square root of n/m as a Decimal, correctly rounded.""" - # Premise: For decimal, computing (n/m).sqrt() can be off - # by 1 ulp from the correctly rounded result. - # Method: Check the result, moving up or down a step if needed. - if n <= 0: - if not n: - return Decimal('0.0') - n, m = -n, -m - - root = (Decimal(n) / Decimal(m)).sqrt() - nr, dr = root.as_integer_ratio() - - plus = root.next_plus() - np, dp = plus.as_integer_ratio() - # test: n / m > ((root + plus) / 2) ** 2 - if 4 * n * (dr*dp)**2 > m * (dr*np + dp*nr)**2: - return plus - - minus = root.next_minus() - nm, dm = minus.as_integer_ratio() - # test: n / m < ((root + minus) / 2) ** 2 - if 4 * n * (dr*dm)**2 < m * (dr*nm + dm*nr)**2: - return minus - - return root - - -# === Measures of central tendency (averages) === - -def mean(data): - """Return the sample arithmetic mean of data. - - >>> mean([1, 2, 3, 4, 4]) - 2.8 - - >>> from fractions import Fraction as F - >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)]) - Fraction(13, 21) - - >>> from decimal import Decimal as D - >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")]) - Decimal('0.5625') - - If ``data`` is empty, StatisticsError will be raised. - """ - T, total, n = _sum(data) - if n < 1: - raise StatisticsError('mean requires at least one data point') - return _convert(total / n, T) - - -def fmean(data, weights=None): - """Convert data to floats and compute the arithmetic mean. - - This runs faster than the mean() function and it always returns a float. - If the input dataset is empty, it raises a StatisticsError. - - >>> fmean([3.5, 4.0, 5.25]) - 4.25 - """ - if weights is None: - try: - n = len(data) - except TypeError: - # Handle iterators that do not define __len__(). - counter = count() - total = fsum(map(itemgetter(0), zip(data, counter))) - n = next(counter) - else: - total = fsum(data) - if not n: - raise StatisticsError('fmean requires at least one data point') - return total / n - if not isinstance(weights, (list, tuple)): - weights = list(weights) - try: - num = sumprod(data, weights) - except ValueError: - raise StatisticsError('data and weights must be the same length') - den = fsum(weights) - if not den: - raise StatisticsError('sum of weights must be non-zero') - return num / den - - -def geometric_mean(data): - """Convert data to floats and compute the geometric mean. - - Raises a StatisticsError if the input dataset is empty - or if it contains a negative value. - - Returns zero if the product of inputs is zero. - - No special efforts are made to achieve exact results. - (However, this may change in the future.) - - >>> round(geometric_mean([54, 24, 36]), 9) - 36.0 - """ - n = 0 - found_zero = False - def count_positive(iterable): - nonlocal n, found_zero - for n, x in enumerate(iterable, start=1): - if x > 0.0 or math.isnan(x): - yield x - elif x == 0.0: - found_zero = True - else: - raise StatisticsError('No negative inputs allowed', x) - total = fsum(map(log, count_positive(data))) - if not n: - raise StatisticsError('Must have a non-empty dataset') - if math.isnan(total): - return math.nan - if found_zero: - return math.nan if total == math.inf else 0.0 - return exp(total / n) - - -def harmonic_mean(data, weights=None): - """Return the harmonic mean of data. - - The harmonic mean is the reciprocal of the arithmetic mean of the - reciprocals of the data. It can be used for averaging ratios or - rates, for example speeds. - - Suppose a car travels 40 km/hr for 5 km and then speeds-up to - 60 km/hr for another 5 km. What is the average speed? - - >>> harmonic_mean([40, 60]) - 48.0 - - Suppose a car travels 40 km/hr for 5 km, and when traffic clears, - speeds-up to 60 km/hr for the remaining 30 km of the journey. What - is the average speed? - - >>> harmonic_mean([40, 60], weights=[5, 30]) - 56.0 + >>> harmonic_mean([40, 60], weights=[5, 30]) + 56.0 If ``data`` is empty, or any element is less than zero, ``harmonic_mean`` will raise ``StatisticsError``. + """ if iter(data) is data: data = list(data) + errmsg = 'harmonic mean does not support negative values' + n = len(data) if n < 1: raise StatisticsError('harmonic_mean requires at least one data point') @@ -597,6 +300,7 @@ def harmonic_mean(data, weights=None): return x else: raise TypeError('unsupported type') + if weights is None: weights = repeat(1, n) sum_weights = n @@ -606,16 +310,19 @@ def harmonic_mean(data, weights=None): if len(weights) != n: raise StatisticsError('Number of weights does not match data size') _, sum_weights, _ = _sum(w for w in _fail_neg(weights, errmsg)) + try: data = _fail_neg(data, errmsg) T, total, count = _sum(w / x if w else 0 for w, x in zip(weights, data)) except ZeroDivisionError: return 0 + if total <= 0: raise StatisticsError('Weighted sum must be positive') + return _convert(sum_weights / total, T) -# FIXME: investigate ways to calculate medians without sorting? Quickselect? + def median(data): """Return the median (middle value) of numeric data. @@ -652,6 +359,9 @@ def median_low(data): 3 """ + # Potentially the sorting step could be replaced with a quickselect. + # However, it would require an excellent implementation to beat our + # highly optimized builtin sort. data = sorted(data) n = len(data) if n == 0: @@ -795,6 +505,7 @@ def multimode(data): ['b', 'd', 'f'] >>> multimode('') [] + """ counts = Counter(iter(data)) if not counts: @@ -803,337 +514,36 @@ def multimode(data): return [value for value, count in counts.items() if count == maxcount] -def kde(data, h, kernel='normal', *, cumulative=False): - """Kernel Density Estimation: Create a continuous probability density - function or cumulative distribution function from discrete samples. - - The basic idea is to smooth the data using a kernel function - to help draw inferences about a population from a sample. +## Measures of spread ###################################################### - The degree of smoothing is controlled by the scaling parameter h - which is called the bandwidth. Smaller values emphasize local - features while larger values give smoother results. +def variance(data, xbar=None): + """Return the sample variance of data. - The kernel determines the relative weights of the sample data - points. Generally, the choice of kernel shape does not matter - as much as the more influential bandwidth smoothing parameter. + data should be an iterable of Real-valued numbers, with at least two + values. The optional argument xbar, if given, should be the mean of + the data. If it is missing or None, the mean is automatically calculated. - Kernels that give some weight to every sample point: + Use this function when your data is a sample from a population. To + calculate the variance from the entire population, see ``pvariance``. - normal (gauss) - logistic - sigmoid + Examples: - Kernels that only give weight to sample points within - the bandwidth: + >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] + >>> variance(data) + 1.3720238095238095 - rectangular (uniform) - triangular - parabolic (epanechnikov) - quartic (biweight) - triweight - cosine + If you have already calculated the mean of your data, you can pass it as + the optional second argument ``xbar`` to avoid recalculating it: - If *cumulative* is true, will return a cumulative distribution function. + >>> m = mean(data) + >>> variance(data, m) + 1.3720238095238095 - A StatisticsError will be raised if the data sequence is empty. + This function does not check that ``xbar`` is actually the mean of + ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or + impossible results. - Example - ------- - - Given a sample of six data points, construct a continuous - function that estimates the underlying probability density: - - >>> sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] - >>> f_hat = kde(sample, h=1.5) - - Compute the area under the curve: - - >>> area = sum(f_hat(x) for x in range(-20, 20)) - >>> round(area, 4) - 1.0 - - Plot the estimated probability density function at - evenly spaced points from -6 to 10: - - >>> for x in range(-6, 11): - ... density = f_hat(x) - ... plot = ' ' * int(density * 400) + 'x' - ... print(f'{x:2}: {density:.3f} {plot}') - ... - -6: 0.002 x - -5: 0.009 x - -4: 0.031 x - -3: 0.070 x - -2: 0.111 x - -1: 0.125 x - 0: 0.110 x - 1: 0.086 x - 2: 0.068 x - 3: 0.059 x - 4: 0.066 x - 5: 0.082 x - 6: 0.082 x - 7: 0.058 x - 8: 0.028 x - 9: 0.009 x - 10: 0.002 x - - Estimate P(4.5 < X <= 7.5), the probability that a new sample value - will be between 4.5 and 7.5: - - >>> cdf = kde(sample, h=1.5, cumulative=True) - >>> round(cdf(7.5) - cdf(4.5), 2) - 0.22 - - References - ---------- - - Kernel density estimation and its application: - https://www.itm-conferences.org/articles/itmconf/pdf/2018/08/itmconf_sam2018_00037.pdf - - Kernel functions in common use: - https://en.wikipedia.org/wiki/Kernel_(statistics)#kernel_functions_in_common_use - - Interactive graphical demonstration and exploration: - https://demonstrations.wolfram.com/KernelDensityEstimation/ - - Kernel estimation of cumulative distribution function of a random variable with bounded support - https://www.econstor.eu/bitstream/10419/207829/1/10.21307_stattrans-2016-037.pdf - - """ - - n = len(data) - if not n: - raise StatisticsError('Empty data sequence') - - if not isinstance(data[0], (int, float)): - raise TypeError('Data sequence must contain ints or floats') - - if h <= 0.0: - raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}') - - match kernel: - - case 'normal' | 'gauss': - sqrt2pi = sqrt(2 * pi) - sqrt2 = sqrt(2) - K = lambda t: exp(-1/2 * t * t) / sqrt2pi - W = lambda t: 1/2 * (1.0 + erf(t / sqrt2)) - support = None - - case 'logistic': - # 1.0 / (exp(t) + 2.0 + exp(-t)) - K = lambda t: 1/2 / (1.0 + cosh(t)) - W = lambda t: 1.0 - 1.0 / (exp(t) + 1.0) - support = None - - case 'sigmoid': - # (2/pi) / (exp(t) + exp(-t)) - c1 = 1 / pi - c2 = 2 / pi - K = lambda t: c1 / cosh(t) - W = lambda t: c2 * atan(exp(t)) - support = None - - case 'rectangular' | 'uniform': - K = lambda t: 1/2 - W = lambda t: 1/2 * t + 1/2 - support = 1.0 - - case 'triangular': - K = lambda t: 1.0 - abs(t) - W = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2 - support = 1.0 - - case 'parabolic' | 'epanechnikov': - K = lambda t: 3/4 * (1.0 - t * t) - W = lambda t: -1/4 * t**3 + 3/4 * t + 1/2 - support = 1.0 - - case 'quartic' | 'biweight': - K = lambda t: 15/16 * (1.0 - t * t) ** 2 - W = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), - (t**5, t**3, t, 1.0)) - support = 1.0 - - case 'triweight': - K = lambda t: 35/32 * (1.0 - t * t) ** 3 - W = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), - (t**7, t**5, t**3, t, 1.0)) - support = 1.0 - - case 'cosine': - c1 = pi / 4 - c2 = pi / 2 - K = lambda t: c1 * cos(c2 * t) - W = lambda t: 1/2 * sin(c2 * t) + 1/2 - support = 1.0 - - case _: - raise StatisticsError(f'Unknown kernel name: {kernel!r}') - - if support is None: - - def pdf(x): - return sum(K((x - x_i) / h) for x_i in data) / (len(data) * h) - - def cdf(x): - return sum(W((x - x_i) / h) for x_i in data) / len(data) - - else: - - sample = sorted(data) - bandwidth = h * support - - def pdf(x): - nonlocal n, sample - if len(data) != n: - sample = sorted(data) - n = len(data) - i = bisect_left(sample, x - bandwidth) - j = bisect_right(sample, x + bandwidth) - supported = sample[i : j] - return sum(K((x - x_i) / h) for x_i in supported) / (n * h) - - def cdf(x): - nonlocal n, sample - if len(data) != n: - sample = sorted(data) - n = len(data) - i = bisect_left(sample, x - bandwidth) - j = bisect_right(sample, x + bandwidth) - supported = sample[i : j] - return sum((W((x - x_i) / h) for x_i in supported), i) / n - - if cumulative: - cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}' - return cdf - - else: - pdf.__doc__ = f'PDF estimate with {h=!r} and {kernel=!r}' - return pdf - - -# Notes on methods for computing quantiles -# ---------------------------------------- -# -# There is no one perfect way to compute quantiles. Here we offer -# two methods that serve common needs. Most other packages -# surveyed offered at least one or both of these two, making them -# "standard" in the sense of "widely-adopted and reproducible". -# They are also easy to explain, easy to compute manually, and have -# straight-forward interpretations that aren't surprising. - -# The default method is known as "R6", "PERCENTILE.EXC", or "expected -# value of rank order statistics". The alternative method is known as -# "R7", "PERCENTILE.INC", or "mode of rank order statistics". - -# For sample data where there is a positive probability for values -# beyond the range of the data, the R6 exclusive method is a -# reasonable choice. Consider a random sample of nine values from a -# population with a uniform distribution from 0.0 to 1.0. The -# distribution of the third ranked sample point is described by -# betavariate(alpha=3, beta=7) which has mode=0.250, median=0.286, and -# mean=0.300. Only the latter (which corresponds with R6) gives the -# desired cut point with 30% of the population falling below that -# value, making it comparable to a result from an inv_cdf() function. -# The R6 exclusive method is also idempotent. - -# For describing population data where the end points are known to -# be included in the data, the R7 inclusive method is a reasonable -# choice. Instead of the mean, it uses the mode of the beta -# distribution for the interior points. Per Hyndman & Fan, "One nice -# property is that the vertices of Q7(p) divide the range into n - 1 -# intervals, and exactly 100p% of the intervals lie to the left of -# Q7(p) and 100(1 - p)% of the intervals lie to the right of Q7(p)." - -# If needed, other methods could be added. However, for now, the -# position is that fewer options make for easier choices and that -# external packages can be used for anything more advanced. - -def quantiles(data, *, n=4, method='exclusive'): - """Divide *data* into *n* continuous intervals with equal probability. - - Returns a list of (n - 1) cut points separating the intervals. - - Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. - Set *n* to 100 for percentiles which gives the 99 cuts points that - separate *data* in to 100 equal sized groups. - - The *data* can be any iterable containing sample. - The cut points are linearly interpolated between data points. - - If *method* is set to *inclusive*, *data* is treated as population - data. The minimum value is treated as the 0th percentile and the - maximum value is treated as the 100th percentile. - """ - if n < 1: - raise StatisticsError('n must be at least 1') - data = sorted(data) - ld = len(data) - if ld < 2: - if ld == 1: - return data * (n - 1) - raise StatisticsError('must have at least one data point') - - if method == 'inclusive': - m = ld - 1 - result = [] - for i in range(1, n): - j, delta = divmod(i * m, n) - interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n - result.append(interpolated) - return result - - if method == 'exclusive': - m = ld + 1 - result = [] - for i in range(1, n): - j = i * m // n # rescale i to m/n - j = 1 if j < 1 else ld-1 if j > ld-1 else j # clamp to 1 .. ld-1 - delta = i*m - j*n # exact integer math - interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n - result.append(interpolated) - return result - - raise ValueError(f'Unknown method: {method!r}') - - -# === Measures of spread === - -# See http://mathworld.wolfram.com/Variance.html -# http://mathworld.wolfram.com/SampleVariance.html - - -def variance(data, xbar=None): - """Return the sample variance of data. - - data should be an iterable of Real-valued numbers, with at least two - values. The optional argument xbar, if given, should be the mean of - the data. If it is missing or None, the mean is automatically calculated. - - Use this function when your data is a sample from a population. To - calculate the variance from the entire population, see ``pvariance``. - - Examples: - - >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] - >>> variance(data) - 1.3720238095238095 - - If you have already calculated the mean of your data, you can pass it as - the optional second argument ``xbar`` to avoid recalculating it: - - >>> m = mean(data) - >>> variance(data, m) - 1.3720238095238095 - - This function does not check that ``xbar`` is actually the mean of - ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or - impossible results. - - Decimals and Fractions are supported: + Decimals and Fractions are supported: >>> from decimal import Decimal as D >>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) @@ -1144,6 +554,8 @@ def variance(data, xbar=None): Fraction(67, 108) """ + # http://mathworld.wolfram.com/SampleVariance.html + T, ss, c, n = _ss(data, xbar) if n < 2: raise StatisticsError('variance requires at least two data points') @@ -1185,6 +597,8 @@ def pvariance(data, mu=None): Fraction(13, 72) """ + # http://mathworld.wolfram.com/Variance.html + T, ss, c, n = _ss(data, mu) if n < 1: raise StatisticsError('pvariance requires at least one data point') @@ -1227,46 +641,7 @@ def pstdev(data, mu=None): return _float_sqrt_of_frac(mss.numerator, mss.denominator) -def _mean_stdev(data): - """In one pass, compute the mean and sample standard deviation as floats.""" - T, ss, xbar, n = _ss(data) - if n < 2: - raise StatisticsError('stdev requires at least two data points') - mss = ss / (n - 1) - try: - return float(xbar), _float_sqrt_of_frac(mss.numerator, mss.denominator) - except AttributeError: - # Handle Nans and Infs gracefully - return float(xbar), float(xbar) / float(ss) - -def _sqrtprod(x: float, y: float) -> float: - "Return sqrt(x * y) computed with improved accuracy and without overflow/underflow." - h = sqrt(x * y) - if not isfinite(h): - if isinf(h) and not isinf(x) and not isinf(y): - # Finite inputs overflowed, so scale down, and recompute. - scale = 2.0 ** -512 # sqrt(1 / sys.float_info.max) - return _sqrtprod(scale * x, scale * y) / scale - return h - if not h: - if x and y: - # Non-zero inputs underflowed, so scale up, and recompute. - # Scale: 1 / sqrt(sys.float_info.min * sys.float_info.epsilon) - scale = 2.0 ** 537 - return _sqrtprod(scale * x, scale * y) / scale - return h - # Improve accuracy with a differential correction. - # https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0 - d = sumprod((x, h), (y, -h)) - return h + d / (2.0 * h) - - -# === Statistics for relations between two inputs === - -# See https://en.wikipedia.org/wiki/Covariance -# https://en.wikipedia.org/wiki/Pearson_correlation_coefficient -# https://en.wikipedia.org/wiki/Simple_linear_regression - +## Statistics for relations between two inputs ############################# def covariance(x, y, /): """Covariance @@ -1285,6 +660,7 @@ def covariance(x, y, /): -7.5 """ + # https://en.wikipedia.org/wiki/Covariance n = len(x) if len(y) != n: raise StatisticsError('covariance requires that both inputs have same number of data points') @@ -1318,7 +694,10 @@ def correlation(x, y, /, *, method='linear'): Spearman's rank correlation coefficient is appropriate for ordinal data or for continuous data that doesn't meet the linear proportion requirement for Pearson's correlation coefficient. + """ + # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient + # https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient n = len(x) if len(y) != n: raise StatisticsError('correlation requires that both inputs have same number of data points') @@ -1326,6 +705,7 @@ def correlation(x, y, /, *, method='linear'): raise StatisticsError('correlation requires at least two data points') if method not in {'linear', 'ranked'}: raise ValueError(f'Unknown method: {method!r}') + if method == 'ranked': start = (n - 1) / -2 # Center rankings around zero x = _rank(x, start=start) @@ -1335,9 +715,11 @@ def correlation(x, y, /, *, method='linear'): ybar = fsum(y) / n x = [xi - xbar for xi in x] y = [yi - ybar for yi in y] + sxy = sumprod(x, y) sxx = sumprod(x, x) syy = sumprod(y, y) + try: return sxy / _sqrtprod(sxx, syy) except ZeroDivisionError: @@ -1384,30 +766,447 @@ def linear_regression(x, y, /, *, proportional=False): >>> linear_regression(x, y, proportional=True) #doctest: +ELLIPSIS LinearRegression(slope=2.90475..., intercept=0.0) - """ - n = len(x) - if len(y) != n: - raise StatisticsError('linear regression requires that both inputs have same number of data points') - if n < 2: - raise StatisticsError('linear regression requires at least two data points') - if not proportional: - xbar = fsum(x) / n - ybar = fsum(y) / n - x = [xi - xbar for xi in x] # List because used three times below - y = (yi - ybar for yi in y) # Generator because only used once below - sxy = sumprod(x, y) + 0.0 # Add zero to coerce result to a float - sxx = sumprod(x, x) - try: - slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x) - except ZeroDivisionError: - raise StatisticsError('x is constant') - intercept = 0.0 if proportional else ybar - slope * xbar - return LinearRegression(slope=slope, intercept=intercept) + """ + # https://en.wikipedia.org/wiki/Simple_linear_regression + n = len(x) + if len(y) != n: + raise StatisticsError('linear regression requires that both inputs have same number of data points') + if n < 2: + raise StatisticsError('linear regression requires at least two data points') + + if not proportional: + xbar = fsum(x) / n + ybar = fsum(y) / n + x = [xi - xbar for xi in x] # List because used three times below + y = (yi - ybar for yi in y) # Generator because only used once below + + sxy = sumprod(x, y) + 0.0 # Add zero to coerce result to a float + sxx = sumprod(x, x) + + try: + slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x) + except ZeroDivisionError: + raise StatisticsError('x is constant') + + intercept = 0.0 if proportional else ybar - slope * xbar + return LinearRegression(slope=slope, intercept=intercept) + + +## Kernel Density Estimation ############################################### + +_kernel_specs = {} + +def register(*kernels): + "Load the kernel's pdf, cdf, invcdf, and support into _kernel_specs." + def deco(builder): + spec = dict(zip(('pdf', 'cdf', 'invcdf', 'support'), builder())) + for kernel in kernels: + _kernel_specs[kernel] = spec + return builder + return deco + +@register('normal', 'gauss') +def normal_kernel(): + sqrt2pi = sqrt(2 * pi) + sqrt2 = sqrt(2) + pdf = lambda t: exp(-1/2 * t * t) / sqrt2pi + cdf = lambda t: 1/2 * (1.0 + erf(t / sqrt2)) + invcdf = lambda t: _normal_dist_inv_cdf(t, 0.0, 1.0) + support = None + return pdf, cdf, invcdf, support + +@register('logistic') +def logistic_kernel(): + # 1.0 / (exp(t) + 2.0 + exp(-t)) + pdf = lambda t: 1/2 / (1.0 + cosh(t)) + cdf = lambda t: 1.0 - 1.0 / (exp(t) + 1.0) + invcdf = lambda p: log(p / (1.0 - p)) + support = None + return pdf, cdf, invcdf, support + +@register('sigmoid') +def sigmoid_kernel(): + # (2/pi) / (exp(t) + exp(-t)) + c1 = 1 / pi + c2 = 2 / pi + c3 = pi / 2 + pdf = lambda t: c1 / cosh(t) + cdf = lambda t: c2 * atan(exp(t)) + invcdf = lambda p: log(tan(p * c3)) + support = None + return pdf, cdf, invcdf, support + +@register('rectangular', 'uniform') +def rectangular_kernel(): + pdf = lambda t: 1/2 + cdf = lambda t: 1/2 * t + 1/2 + invcdf = lambda p: 2.0 * p - 1.0 + support = 1.0 + return pdf, cdf, invcdf, support + +@register('triangular') +def triangular_kernel(): + pdf = lambda t: 1.0 - abs(t) + cdf = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2 + invcdf = lambda p: sqrt(2.0*p) - 1.0 if p < 1/2 else 1.0 - sqrt(2.0 - 2.0*p) + support = 1.0 + return pdf, cdf, invcdf, support + +@register('parabolic', 'epanechnikov') +def parabolic_kernel(): + pdf = lambda t: 3/4 * (1.0 - t * t) + cdf = lambda t: sumprod((-1/4, 3/4, 1/2), (t**3, t, 1.0)) + invcdf = lambda p: 2.0 * cos((acos(2.0*p - 1.0) + pi) / 3.0) + support = 1.0 + return pdf, cdf, invcdf, support + +def _newton_raphson(f_inv_estimate, f, f_prime, tolerance=1e-12): + def f_inv(y): + "Return x such that f(x) ≈ y within the specified tolerance." + x = f_inv_estimate(y) + while abs(diff := f(x) - y) > tolerance: + x -= diff / f_prime(x) + return x + return f_inv + +def _quartic_invcdf_estimate(p): + sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p) + x = (2.0 * p) ** 0.4258865685331 - 1.0 + if p >= 0.004 < 0.499: + x += 0.026818732 * sin(7.101753784 * p + 2.73230839482953) + return x * sign + +@register('quartic', 'biweight') +def quartic_kernel(): + pdf = lambda t: 15/16 * (1.0 - t * t) ** 2 + cdf = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), + (t**5, t**3, t, 1.0)) + invcdf = _newton_raphson(_quartic_invcdf_estimate, f=cdf, f_prime=pdf) + support = 1.0 + return pdf, cdf, invcdf, support + +def _triweight_invcdf_estimate(p): + sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p) + x = (2.0 * p) ** 0.3400218741872791 - 1.0 + return x * sign + +@register('triweight') +def triweight_kernel(): + pdf = lambda t: 35/32 * (1.0 - t * t) ** 3 + cdf = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), + (t**7, t**5, t**3, t, 1.0)) + invcdf = _newton_raphson(_triweight_invcdf_estimate, f=cdf, f_prime=pdf) + support = 1.0 + return pdf, cdf, invcdf, support + +@register('cosine') +def cosine_kernel(): + c1 = pi / 4 + c2 = pi / 2 + pdf = lambda t: c1 * cos(c2 * t) + cdf = lambda t: 1/2 * sin(c2 * t) + 1/2 + invcdf = lambda p: 2.0 * asin(2.0 * p - 1.0) / pi + support = 1.0 + return pdf, cdf, invcdf, support + +del register, normal_kernel, logistic_kernel, sigmoid_kernel +del rectangular_kernel, triangular_kernel, parabolic_kernel +del quartic_kernel, triweight_kernel, cosine_kernel + + +def kde(data, h, kernel='normal', *, cumulative=False): + """Kernel Density Estimation: Create a continuous probability density + function or cumulative distribution function from discrete samples. + + The basic idea is to smooth the data using a kernel function + to help draw inferences about a population from a sample. + + The degree of smoothing is controlled by the scaling parameter h + which is called the bandwidth. Smaller values emphasize local + features while larger values give smoother results. + + The kernel determines the relative weights of the sample data + points. Generally, the choice of kernel shape does not matter + as much as the more influential bandwidth smoothing parameter. + + Kernels that give some weight to every sample point: + + normal (gauss) + logistic + sigmoid + + Kernels that only give weight to sample points within + the bandwidth: + + rectangular (uniform) + triangular + parabolic (epanechnikov) + quartic (biweight) + triweight + cosine + + If *cumulative* is true, will return a cumulative distribution function. + + A StatisticsError will be raised if the data sequence is empty. + + Example + ------- + + Given a sample of six data points, construct a continuous + function that estimates the underlying probability density: + + >>> sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] + >>> f_hat = kde(sample, h=1.5) + + Compute the area under the curve: + + >>> area = sum(f_hat(x) for x in range(-20, 20)) + >>> round(area, 4) + 1.0 + + Plot the estimated probability density function at + evenly spaced points from -6 to 10: + + >>> for x in range(-6, 11): + ... density = f_hat(x) + ... plot = ' ' * int(density * 400) + 'x' + ... print(f'{x:2}: {density:.3f} {plot}') + ... + -6: 0.002 x + -5: 0.009 x + -4: 0.031 x + -3: 0.070 x + -2: 0.111 x + -1: 0.125 x + 0: 0.110 x + 1: 0.086 x + 2: 0.068 x + 3: 0.059 x + 4: 0.066 x + 5: 0.082 x + 6: 0.082 x + 7: 0.058 x + 8: 0.028 x + 9: 0.009 x + 10: 0.002 x + + Estimate P(4.5 < X <= 7.5), the probability that a new sample value + will be between 4.5 and 7.5: + + >>> cdf = kde(sample, h=1.5, cumulative=True) + >>> round(cdf(7.5) - cdf(4.5), 2) + 0.22 + + References + ---------- + + Kernel density estimation and its application: + https://www.itm-conferences.org/articles/itmconf/pdf/2018/08/itmconf_sam2018_00037.pdf + + Kernel functions in common use: + https://en.wikipedia.org/wiki/Kernel_(statistics)#kernel_functions_in_common_use + + Interactive graphical demonstration and exploration: + https://demonstrations.wolfram.com/KernelDensityEstimation/ + + Kernel estimation of cumulative distribution function of a random variable with bounded support + https://www.econstor.eu/bitstream/10419/207829/1/10.21307_stattrans-2016-037.pdf + + """ + + n = len(data) + if not n: + raise StatisticsError('Empty data sequence') + + if not isinstance(data[0], (int, float)): + raise TypeError('Data sequence must contain ints or floats') + + if h <= 0.0: + raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}') + + kernel_spec = _kernel_specs.get(kernel) + if kernel_spec is None: + raise StatisticsError(f'Unknown kernel name: {kernel!r}') + K = kernel_spec['pdf'] + W = kernel_spec['cdf'] + support = kernel_spec['support'] + + if support is None: + + def pdf(x): + return sum(K((x - x_i) / h) for x_i in data) / (len(data) * h) + + def cdf(x): + return sum(W((x - x_i) / h) for x_i in data) / len(data) + + else: + + sample = sorted(data) + bandwidth = h * support + + def pdf(x): + nonlocal n, sample + if len(data) != n: + sample = sorted(data) + n = len(data) + i = bisect_left(sample, x - bandwidth) + j = bisect_right(sample, x + bandwidth) + supported = sample[i : j] + return sum(K((x - x_i) / h) for x_i in supported) / (n * h) + + def cdf(x): + nonlocal n, sample + if len(data) != n: + sample = sorted(data) + n = len(data) + i = bisect_left(sample, x - bandwidth) + j = bisect_right(sample, x + bandwidth) + supported = sample[i : j] + return sum((W((x - x_i) / h) for x_i in supported), i) / n + + if cumulative: + cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}' + return cdf + + else: + pdf.__doc__ = f'PDF estimate with {h=!r} and {kernel=!r}' + return pdf + + +def kde_random(data, h, kernel='normal', *, seed=None): + """Return a function that makes a random selection from the estimated + probability density function created by kde(data, h, kernel). + + Providing a *seed* allows reproducible selections within a single + thread. The seed may be an integer, float, str, or bytes. + + A StatisticsError will be raised if the *data* sequence is empty. + + Example: + + >>> data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] + >>> rand = kde_random(data, h=1.5, seed=8675309) + >>> new_selections = [rand() for i in range(10)] + >>> [round(x, 1) for x in new_selections] + [0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6] + + """ + n = len(data) + if not n: + raise StatisticsError('Empty data sequence') + + if not isinstance(data[0], (int, float)): + raise TypeError('Data sequence must contain ints or floats') + + if h <= 0.0: + raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}') + + kernel_spec = _kernel_specs.get(kernel) + if kernel_spec is None: + raise StatisticsError(f'Unknown kernel name: {kernel!r}') + invcdf = kernel_spec['invcdf'] + + prng = _random.Random(seed) + random = prng.random + choice = prng.choice + + def rand(): + return choice(data) + h * invcdf(random()) + + rand.__doc__ = f'Random KDE selection with {h=!r} and {kernel=!r}' + + return rand + + +## Quantiles ############################################################### + +# There is no one perfect way to compute quantiles. Here we offer +# two methods that serve common needs. Most other packages +# surveyed offered at least one or both of these two, making them +# "standard" in the sense of "widely-adopted and reproducible". +# They are also easy to explain, easy to compute manually, and have +# straight-forward interpretations that aren't surprising. + +# The default method is known as "R6", "PERCENTILE.EXC", or "expected +# value of rank order statistics". The alternative method is known as +# "R7", "PERCENTILE.INC", or "mode of rank order statistics". + +# For sample data where there is a positive probability for values +# beyond the range of the data, the R6 exclusive method is a +# reasonable choice. Consider a random sample of nine values from a +# population with a uniform distribution from 0.0 to 1.0. The +# distribution of the third ranked sample point is described by +# betavariate(alpha=3, beta=7) which has mode=0.250, median=0.286, and +# mean=0.300. Only the latter (which corresponds with R6) gives the +# desired cut point with 30% of the population falling below that +# value, making it comparable to a result from an inv_cdf() function. +# The R6 exclusive method is also idempotent. + +# For describing population data where the end points are known to +# be included in the data, the R7 inclusive method is a reasonable +# choice. Instead of the mean, it uses the mode of the beta +# distribution for the interior points. Per Hyndman & Fan, "One nice +# property is that the vertices of Q7(p) divide the range into n - 1 +# intervals, and exactly 100p% of the intervals lie to the left of +# Q7(p) and 100(1 - p)% of the intervals lie to the right of Q7(p)." + +# If needed, other methods could be added. However, for now, the +# position is that fewer options make for easier choices and that +# external packages can be used for anything more advanced. + +def quantiles(data, *, n=4, method='exclusive'): + """Divide *data* into *n* continuous intervals with equal probability. + + Returns a list of (n - 1) cut points separating the intervals. + + Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. + Set *n* to 100 for percentiles which gives the 99 cuts points that + separate *data* in to 100 equal sized groups. + + The *data* can be any iterable containing sample. + The cut points are linearly interpolated between data points. + + If *method* is set to *inclusive*, *data* is treated as population + data. The minimum value is treated as the 0th percentile and the + maximum value is treated as the 100th percentile. + + """ + if n < 1: + raise StatisticsError('n must be at least 1') + + data = sorted(data) + + ld = len(data) + if ld < 2: + if ld == 1: + return data * (n - 1) + raise StatisticsError('must have at least one data point') + + if method == 'inclusive': + m = ld - 1 + result = [] + for i in range(1, n): + j, delta = divmod(i * m, n) + interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n + result.append(interpolated) + return result + + if method == 'exclusive': + m = ld + 1 + result = [] + for i in range(1, n): + j = i * m // n # rescale i to m/n + j = 1 if j < 1 else ld-1 if j > ld-1 else j # clamp to 1 .. ld-1 + delta = i*m - j*n # exact integer math + interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n + result.append(interpolated) + return result + + raise ValueError(f'Unknown method: {method!r}') ## Normal Distribution ##################################################### - def _normal_dist_inv_cdf(p, mu, sigma): # There is no closed-form solution to the inverse CDF for the normal # distribution, so we use a rational approximation instead: @@ -1415,6 +1214,7 @@ def _normal_dist_inv_cdf(p, mu, sigma): # Normal Distribution". Applied Statistics. Blackwell Publishing. 37 # (3): 477–484. doi:10.2307/2347330. JSTOR 2347330. q = p - 0.5 + if fabs(q) <= 0.425: r = 0.180625 - q * q # Hash sum: 55.88319_28806_14901_4439 @@ -1436,6 +1236,7 @@ def _normal_dist_inv_cdf(p, mu, sigma): 1.0) x = num / den return mu + (x * sigma) + r = p if q <= 0.0 else 1.0 - p r = sqrt(-log(r)) if r <= 5.0: @@ -1476,9 +1277,11 @@ def _normal_dist_inv_cdf(p, mu, sigma): 1.36929_88092_27358_05310e-1) * r + 5.99832_20655_58879_37690e-1) * r + 1.0) + x = num / den if q < 0.0: x = -x + return mu + (x * sigma) @@ -1635,172 +1438,416 @@ def variance(self): def __add__(x1, x2): """Add a constant or another NormalDist instance. - If *other* is a constant, translate mu by the constant, - leaving sigma unchanged. + If *other* is a constant, translate mu by the constant, + leaving sigma unchanged. + + If *other* is a NormalDist, add both the means and the variances. + Mathematically, this works only if the two distributions are + independent or if they are jointly normally distributed. + """ + if isinstance(x2, NormalDist): + return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma)) + return NormalDist(x1._mu + x2, x1._sigma) + + def __sub__(x1, x2): + """Subtract a constant or another NormalDist instance. + + If *other* is a constant, translate by the constant mu, + leaving sigma unchanged. + + If *other* is a NormalDist, subtract the means and add the variances. + Mathematically, this works only if the two distributions are + independent or if they are jointly normally distributed. + """ + if isinstance(x2, NormalDist): + return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma)) + return NormalDist(x1._mu - x2, x1._sigma) + + def __mul__(x1, x2): + """Multiply both mu and sigma by a constant. + + Used for rescaling, perhaps to change measurement units. + Sigma is scaled with the absolute value of the constant. + """ + return NormalDist(x1._mu * x2, x1._sigma * fabs(x2)) + + def __truediv__(x1, x2): + """Divide both mu and sigma by a constant. + + Used for rescaling, perhaps to change measurement units. + Sigma is scaled with the absolute value of the constant. + """ + return NormalDist(x1._mu / x2, x1._sigma / fabs(x2)) + + def __pos__(x1): + "Return a copy of the instance." + return NormalDist(x1._mu, x1._sigma) + + def __neg__(x1): + "Negates mu while keeping sigma the same." + return NormalDist(-x1._mu, x1._sigma) + + __radd__ = __add__ + + def __rsub__(x1, x2): + "Subtract a NormalDist from a constant or another NormalDist." + return -(x1 - x2) + + __rmul__ = __mul__ + + def __eq__(x1, x2): + "Two NormalDist objects are equal if their mu and sigma are both equal." + if not isinstance(x2, NormalDist): + return NotImplemented + return x1._mu == x2._mu and x1._sigma == x2._sigma + + def __hash__(self): + "NormalDist objects hash equal if their mu and sigma are both equal." + return hash((self._mu, self._sigma)) + + def __repr__(self): + return f'{type(self).__name__}(mu={self._mu!r}, sigma={self._sigma!r})' + + def __getstate__(self): + return self._mu, self._sigma + + def __setstate__(self, state): + self._mu, self._sigma = state + + +## Private utilities ####################################################### + +def _sum(data): + """_sum(data) -> (type, sum, count) + + Return a high-precision sum of the given numeric data as a fraction, + together with the type to be converted to and the count of items. + + Examples + -------- + + >>> _sum([3, 2.25, 4.5, -0.5, 0.25]) + (, Fraction(19, 2), 5) + + Some sources of round-off error will be avoided: + + # Built-in sum returns zero. + >>> _sum([1e50, 1, -1e50] * 1000) + (, Fraction(1000, 1), 3000) + + Fractions and Decimals are also supported: + + >>> from fractions import Fraction as F + >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) + (, Fraction(63, 20), 4) + + >>> from decimal import Decimal as D + >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] + >>> _sum(data) + (, Fraction(6963, 10000), 4) + + Mixed types are currently treated as an error, except that int is + allowed. + + """ + count = 0 + types = set() + types_add = types.add + partials = {} + partials_get = partials.get + for typ, values in groupby(data, type): + types_add(typ) + for n, d in map(_exact_ratio, values): + count += 1 + partials[d] = partials_get(d, 0) + n + if None in partials: + # The sum will be a NAN or INF. We can ignore all the finite + # partials, and just look at this special one. + total = partials[None] + assert not _isfinite(total) + else: + # Sum all the partial sums using builtin sum. + total = sum(Fraction(n, d) for d, n in partials.items()) + T = reduce(_coerce, types, int) # or raise TypeError + return (T, total, count) + + +def _ss(data, c=None): + """Return the exact mean and sum of square deviations of sequence data. + + Calculations are done in a single pass, allowing the input to be an iterator. + + If given *c* is used the mean; otherwise, it is calculated from the data. + Use the *c* argument with care, as it can lead to garbage results. + + """ + if c is not None: + T, ssd, count = _sum((d := x - c) * d for x in data) + return (T, ssd, c, count) + + count = 0 + types = set() + types_add = types.add + sx_partials = defaultdict(int) + sxx_partials = defaultdict(int) + for typ, values in groupby(data, type): + types_add(typ) + for n, d in map(_exact_ratio, values): + count += 1 + sx_partials[d] += n + sxx_partials[d] += n * n + + if not count: + ssd = c = Fraction(0) + elif None in sx_partials: + # The sum will be a NAN or INF. We can ignore all the finite + # partials, and just look at this special one. + ssd = c = sx_partials[None] + assert not _isfinite(ssd) + else: + sx = sum(Fraction(n, d) for d, n in sx_partials.items()) + sxx = sum(Fraction(n, d*d) for d, n in sxx_partials.items()) + # This formula has poor numeric properties for floats, + # but with fractions it is exact. + ssd = (count * sxx - sx * sx) / count + c = sx / count + + T = reduce(_coerce, types, int) # or raise TypeError + return (T, ssd, c, count) + + +def _isfinite(x): + try: + return x.is_finite() # Likely a Decimal. + except AttributeError: + return math.isfinite(x) # Coerces to float first. + + +def _coerce(T, S): + """Coerce types T and S to a common type, or raise TypeError. + + Coercion rules are currently an implementation detail. See the CoerceTest + test class in test_statistics for details. + + """ + # See http://bugs.python.org/issue24068. + assert T is not bool, "initial type T is bool" + # If the types are the same, no need to coerce anything. Put this + # first, so that the usual case (no coercion needed) happens as soon + # as possible. + if T is S: return T + # Mixed int & other coerce to the other type. + if S is int or S is bool: return T + if T is int: return S + # If one is a (strict) subclass of the other, coerce to the subclass. + if issubclass(S, T): return S + if issubclass(T, S): return T + # Ints coerce to the other type. + if issubclass(T, int): return S + if issubclass(S, int): return T + # Mixed fraction & float coerces to float (or float subclass). + if issubclass(T, Fraction) and issubclass(S, float): + return S + if issubclass(T, float) and issubclass(S, Fraction): + return T + # Any other combination is disallowed. + msg = "don't know how to coerce %s and %s" + raise TypeError(msg % (T.__name__, S.__name__)) + + +def _exact_ratio(x): + """Return Real number x to exact (numerator, denominator) pair. + + >>> _exact_ratio(0.25) + (1, 4) + + x is expected to be an int, Fraction, Decimal or float. - If *other* is a NormalDist, add both the means and the variances. - Mathematically, this works only if the two distributions are - independent or if they are jointly normally distributed. - """ - if isinstance(x2, NormalDist): - return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma)) - return NormalDist(x1._mu + x2, x1._sigma) + """ + try: + return x.as_integer_ratio() + except AttributeError: + pass + except (OverflowError, ValueError): + # float NAN or INF. + assert not _isfinite(x) + return (x, None) - def __sub__(x1, x2): - """Subtract a constant or another NormalDist instance. + try: + # x may be an Integral ABC. + return (x.numerator, x.denominator) + except AttributeError: + msg = f"can't convert type '{type(x).__name__}' to numerator/denominator" + raise TypeError(msg) - If *other* is a constant, translate by the constant mu, - leaving sigma unchanged. - If *other* is a NormalDist, subtract the means and add the variances. - Mathematically, this works only if the two distributions are - independent or if they are jointly normally distributed. - """ - if isinstance(x2, NormalDist): - return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma)) - return NormalDist(x1._mu - x2, x1._sigma) +def _convert(value, T): + """Convert value to given numeric type T.""" + if type(value) is T: + # This covers the cases where T is Fraction, or where value is + # a NAN or INF (Decimal or float). + return value + if issubclass(T, int) and value.denominator != 1: + T = float + try: + # FIXME: what do we do if this overflows? + return T(value) + except TypeError: + if issubclass(T, Decimal): + return T(value.numerator) / T(value.denominator) + else: + raise - def __mul__(x1, x2): - """Multiply both mu and sigma by a constant. - Used for rescaling, perhaps to change measurement units. - Sigma is scaled with the absolute value of the constant. - """ - return NormalDist(x1._mu * x2, x1._sigma * fabs(x2)) +def _fail_neg(values, errmsg='negative value'): + """Iterate over values, failing if any are less than zero.""" + for x in values: + if x < 0: + raise StatisticsError(errmsg) + yield x - def __truediv__(x1, x2): - """Divide both mu and sigma by a constant. - Used for rescaling, perhaps to change measurement units. - Sigma is scaled with the absolute value of the constant. - """ - return NormalDist(x1._mu / x2, x1._sigma / fabs(x2)) +def _rank(data, /, *, key=None, reverse=False, ties='average', start=1) -> list[float]: + """Rank order a dataset. The lowest value has rank 1. - def __pos__(x1): - "Return a copy of the instance." - return NormalDist(x1._mu, x1._sigma) + Ties are averaged so that equal values receive the same rank: - def __neg__(x1): - "Negates mu while keeping sigma the same." - return NormalDist(-x1._mu, x1._sigma) + >>> data = [31, 56, 31, 25, 75, 18] + >>> _rank(data) + [3.5, 5.0, 3.5, 2.0, 6.0, 1.0] - __radd__ = __add__ + The operation is idempotent: - def __rsub__(x1, x2): - "Subtract a NormalDist from a constant or another NormalDist." - return -(x1 - x2) + >>> _rank([3.5, 5.0, 3.5, 2.0, 6.0, 1.0]) + [3.5, 5.0, 3.5, 2.0, 6.0, 1.0] - __rmul__ = __mul__ + It is possible to rank the data in reverse order so that the + highest value has rank 1. Also, a key-function can extract + the field to be ranked: - def __eq__(x1, x2): - "Two NormalDist objects are equal if their mu and sigma are both equal." - if not isinstance(x2, NormalDist): - return NotImplemented - return x1._mu == x2._mu and x1._sigma == x2._sigma + >>> goals = [('eagles', 45), ('bears', 48), ('lions', 44)] + >>> _rank(goals, key=itemgetter(1), reverse=True) + [2.0, 1.0, 3.0] - def __hash__(self): - "NormalDist objects hash equal if their mu and sigma are both equal." - return hash((self._mu, self._sigma)) + Ranks are conventionally numbered starting from one; however, + setting *start* to zero allows the ranks to be used as array indices: - def __repr__(self): - return f'{type(self).__name__}(mu={self._mu!r}, sigma={self._sigma!r})' + >>> prize = ['Gold', 'Silver', 'Bronze', 'Certificate'] + >>> scores = [8.1, 7.3, 9.4, 8.3] + >>> [prize[int(i)] for i in _rank(scores, start=0, reverse=True)] + ['Bronze', 'Certificate', 'Gold', 'Silver'] - def __getstate__(self): - return self._mu, self._sigma + """ + # If this function becomes public at some point, more thought + # needs to be given to the signature. A list of ints is + # plausible when ties is "min" or "max". When ties is "average", + # either list[float] or list[Fraction] is plausible. - def __setstate__(self, state): - self._mu, self._sigma = state + # Default handling of ties matches scipy.stats.mstats.spearmanr. + if ties != 'average': + raise ValueError(f'Unknown tie resolution method: {ties!r}') + if key is not None: + data = map(key, data) + val_pos = sorted(zip(data, count()), reverse=reverse) + i = start - 1 + result = [0] * len(val_pos) + for _, g in groupby(val_pos, key=itemgetter(0)): + group = list(g) + size = len(group) + rank = i + (size + 1) / 2 + for value, orig_pos in group: + result[orig_pos] = rank + i += size + return result -## kde_random() ############################################################## +def _integer_sqrt_of_frac_rto(n: int, m: int) -> int: + """Square root of n/m, rounded to the nearest integer using round-to-odd.""" + # Reference: https://www.lri.fr/~melquion/doc/05-imacs17_1-expose.pdf + a = math.isqrt(n // m) + return a | (a*a*m != n) -def _newton_raphson(f_inv_estimate, f, f_prime, tolerance=1e-12): - def f_inv(y): - "Return x such that f(x) ≈ y within the specified tolerance." - x = f_inv_estimate(y) - while abs(diff := f(x) - y) > tolerance: - x -= diff / f_prime(x) - return x - return f_inv -def _quartic_invcdf_estimate(p): - sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p) - x = (2.0 * p) ** 0.4258865685331 - 1.0 - if p >= 0.004 < 0.499: - x += 0.026818732 * sin(7.101753784 * p + 2.73230839482953) - return x * sign +# For 53 bit precision floats, the bit width used in +# _float_sqrt_of_frac() is 109. +_sqrt_bit_width: int = 2 * sys.float_info.mant_dig + 3 -_quartic_invcdf = _newton_raphson( - f_inv_estimate = _quartic_invcdf_estimate, - f = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), (t**5, t**3, t, 1.0)), - f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2) -def _triweight_invcdf_estimate(p): - sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p) - x = (2.0 * p) ** 0.3400218741872791 - 1.0 - return x * sign +def _float_sqrt_of_frac(n: int, m: int) -> float: + """Square root of n/m as a float, correctly rounded.""" + # See principle and proof sketch at: https://bugs.python.org/msg407078 + q = (n.bit_length() - m.bit_length() - _sqrt_bit_width) // 2 + if q >= 0: + numerator = _integer_sqrt_of_frac_rto(n, m << 2 * q) << q + denominator = 1 + else: + numerator = _integer_sqrt_of_frac_rto(n << -2 * q, m) + denominator = 1 << -q + return numerator / denominator # Convert to float -_triweight_invcdf = _newton_raphson( - f_inv_estimate = _triweight_invcdf_estimate, - f = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), - (t**7, t**5, t**3, t, 1.0)), - f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3) - -_kernel_invcdfs = { - 'normal': NormalDist().inv_cdf, - 'logistic': lambda p: log(p / (1 - p)), - 'sigmoid': lambda p: log(tan(p * pi/2)), - 'rectangular': lambda p: 2*p - 1, - 'parabolic': lambda p: 2 * cos((acos(2*p-1) + pi) / 3), - 'quartic': _quartic_invcdf, - 'triweight': _triweight_invcdf, - 'triangular': lambda p: sqrt(2*p) - 1 if p < 1/2 else 1 - sqrt(2 - 2*p), - 'cosine': lambda p: 2 * asin(2*p - 1) / pi, -} -_kernel_invcdfs['gauss'] = _kernel_invcdfs['normal'] -_kernel_invcdfs['uniform'] = _kernel_invcdfs['rectangular'] -_kernel_invcdfs['epanechnikov'] = _kernel_invcdfs['parabolic'] -_kernel_invcdfs['biweight'] = _kernel_invcdfs['quartic'] -def kde_random(data, h, kernel='normal', *, seed=None): - """Return a function that makes a random selection from the estimated - probability density function created by kde(data, h, kernel). +def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: + """Square root of n/m as a Decimal, correctly rounded.""" + # Premise: For decimal, computing (n/m).sqrt() can be off + # by 1 ulp from the correctly rounded result. + # Method: Check the result, moving up or down a step if needed. + if n <= 0: + if not n: + return Decimal('0.0') + n, m = -n, -m - Providing a *seed* allows reproducible selections within a single - thread. The seed may be an integer, float, str, or bytes. + root = (Decimal(n) / Decimal(m)).sqrt() + nr, dr = root.as_integer_ratio() - A StatisticsError will be raised if the *data* sequence is empty. + plus = root.next_plus() + np, dp = plus.as_integer_ratio() + # test: n / m > ((root + plus) / 2) ** 2 + if 4 * n * (dr*dp)**2 > m * (dr*np + dp*nr)**2: + return plus - Example: + minus = root.next_minus() + nm, dm = minus.as_integer_ratio() + # test: n / m < ((root + minus) / 2) ** 2 + if 4 * n * (dr*dm)**2 < m * (dr*nm + dm*nr)**2: + return minus - >>> data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] - >>> rand = kde_random(data, h=1.5, seed=8675309) - >>> new_selections = [rand() for i in range(10)] - >>> [round(x, 1) for x in new_selections] - [0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6] + return root - """ - n = len(data) - if not n: - raise StatisticsError('Empty data sequence') - if not isinstance(data[0], (int, float)): - raise TypeError('Data sequence must contain ints or floats') +def _mean_stdev(data): + """In one pass, compute the mean and sample standard deviation as floats.""" + T, ss, xbar, n = _ss(data) + if n < 2: + raise StatisticsError('stdev requires at least two data points') + mss = ss / (n - 1) + try: + return float(xbar), _float_sqrt_of_frac(mss.numerator, mss.denominator) + except AttributeError: + # Handle Nans and Infs gracefully + return float(xbar), float(xbar) / float(ss) - if h <= 0.0: - raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}') - kernel_invcdf = _kernel_invcdfs.get(kernel) - if kernel_invcdf is None: - raise StatisticsError(f'Unknown kernel name: {kernel!r}') +def _sqrtprod(x: float, y: float) -> float: + "Return sqrt(x * y) computed with improved accuracy and without overflow/underflow." - prng = _random.Random(seed) - random = prng.random - choice = prng.choice + h = sqrt(x * y) - def rand(): - return choice(data) + h * kernel_invcdf(random()) + if not isfinite(h): + if isinf(h) and not isinf(x) and not isinf(y): + # Finite inputs overflowed, so scale down, and recompute. + scale = 2.0 ** -512 # sqrt(1 / sys.float_info.max) + return _sqrtprod(scale * x, scale * y) / scale + return h - rand.__doc__ = f'Random KDE selection with {h=!r} and {kernel=!r}' + if not h: + if x and y: + # Non-zero inputs underflowed, so scale up, and recompute. + # Scale: 1 / sqrt(sys.float_info.min * sys.float_info.epsilon) + scale = 2.0 ** 537 + return _sqrtprod(scale * x, scale * y) / scale + return h - return rand + # Improve accuracy with a differential correction. + # https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0 + d = sumprod((x, h), (y, -h)) + return h + d / (2.0 * h) diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index cded8aba6e8cd7..0b28459f03d86a 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2435,12 +2435,13 @@ def integrate(func, low, high, steps=10_000): self.assertGreater(f_hat(100), 0.0) def test_kde_kernel_invcdfs(self): - kernel_invcdfs = statistics._kernel_invcdfs + kernel_specs = statistics._kernel_specs kde = statistics.kde # Verify that cdf / invcdf will round trip xarr = [i/100 for i in range(-100, 101)] - for kernel, invcdf in kernel_invcdfs.items(): + for kernel, spec in kernel_specs.items(): + invcdf = spec['invcdf'] with self.subTest(kernel=kernel): cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True) for x in xarr: From fd6cd621e0cce6ba2e737103d2a62b5ade90f41f Mon Sep 17 00:00:00 2001 From: Alyssa Coghlan Date: Sun, 2 Jun 2024 14:44:29 +1000 Subject: [PATCH 036/373] gh-118934: Fix PyEval_GetLocals docs (PEP 667) (#119932) PEP 667's description of the planned changes to PyEval_GetLocals was internally inconsistent when accepted, so the docs added for gh-74929 didn't match either the current behaviour or the intended behaviour once gh-118934 is fixed. This PR updates the documentation and 3.13 What's New to match the intended behaviour (once gh-118934 is fixed). It also tidies up lingering references to `f_locals` always being a dictionary (this hasn't been true since at least when custom namespace support for class statement execution was added) --- Doc/c-api/reflection.rst | 27 ++++++++++++++++----------- Doc/reference/datamodel.rst | 4 ++-- Doc/whatsnew/3.13.rst | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/Doc/c-api/reflection.rst b/Doc/c-api/reflection.rst index af9a1a74ec137e..038e6977104560 100644 --- a/Doc/c-api/reflection.rst +++ b/Doc/c-api/reflection.rst @@ -19,23 +19,28 @@ Reflection .. deprecated:: 3.13 - To avoid creating a reference cycle in :term:`optimized scopes `, - use either :c:func:`PyEval_GetFrameLocals` to obtain the same behaviour as calling + Use either :c:func:`PyEval_GetFrameLocals` to obtain the same behaviour as calling :func:`locals` in Python code, or else call :c:func:`PyFrame_GetLocals` on the result - of :c:func:`PyEval_GetFrame` to get the same result as this function without having to - cache the proxy instance on the underlying frame. + of :c:func:`PyEval_GetFrame` to access the :attr:`~frame.f_locals` attribute of the + currently executing frame. - Return the :attr:`~frame.f_locals` attribute of the currently executing frame, + Return a mapping providing access to the local variables in the current execution frame, or ``NULL`` if no frame is currently executing. - If the frame refers to an :term:`optimized scope`, this returns a - write-through proxy object that allows modifying the locals. - In all other cases (classes, modules, :func:`exec`, :func:`eval`) it returns - the mapping representing the frame locals directly (as described for - :func:`locals`). + Refer to :func:`locals` for details of the mapping returned at different scopes. + + As this function returns a :term:`borrowed reference`, the dictionary returned for + :term:`optimized scopes ` is cached on the frame object and will remain + alive as long as the frame object does. Unlike :c:func:`PyEval_GetFrameLocals` and + :func:`locals`, subsequent calls to this function in the same frame will update the + contents of the cached dictionary to reflect changes in the state of the local variables + rather than returning a new snapshot. .. versionchanged:: 3.13 - As part of :pep:`667`, return a proxy object for optimized scopes. + As part of :pep:`667`, :c:func:`PyFrame_GetLocals`, :func:`locals`, and + :attr:`FrameType.f_locals ` no longer make use of the shared cache + dictionary. Refer to the :ref:`What's New entry ` for + additional details. .. c:function:: PyObject* PyEval_GetGlobals(void) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 134385ed2f1860..9110060a6177e5 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1347,13 +1347,13 @@ Special read-only attributes ``object.__getattr__`` with arguments ``obj`` and ``"f_code"``. * - .. attribute:: frame.f_locals - - The dictionary used by the frame to look up + - The mapping used by the frame to look up :ref:`local variables `. If the frame refers to an :term:`optimized scope`, this may return a write-through proxy object. .. versionchanged:: 3.13 - Return a proxy for functions and comprehensions. + Return a proxy for optimized scopes. * - .. attribute:: frame.f_globals - The dictionary used by the frame to look up diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index ab260bf2a2d740..66626ac06428b9 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -287,8 +287,9 @@ returns a write-through proxy to the frame's local and locally referenced nonlocal variables in these scopes, rather than returning an inconsistently updated shared ``dict`` instance with undefined runtime semantics. -See :pep:`667` for more details, including related C API changes and -deprecations. +See :pep:`667` for more details, including related C API changes and deprecations. Porting +notes are also provided below for the affected :ref:`Python APIs ` +and :ref:`C APIs `. (PEP and implementation contributed by Mark Shannon and Tian Gao in :gh:`74929`. Documentation updates provided by Guido van Rossum and @@ -2246,6 +2247,8 @@ Changes in the Python API returned by :meth:`zipfile.ZipFile.open` was changed from ``'r'`` to ``'rb'``. (Contributed by Serhiy Storchaka in :gh:`115961`.) +.. _pep667-porting-notes-py: + * Calling :func:`locals` in an :term:`optimized scope` now produces an independent snapshot on each call, and hence no longer implicitly updates previously returned references. Obtaining the legacy CPython behaviour now @@ -2341,15 +2344,27 @@ Changes in the C API to :c:func:`PyUnstable_Code_GetFirstFree`. (Contributed by Bogdan Romanyuk in :gh:`115781`.) -* Calling :c:func:`PyFrame_GetLocals` or :c:func:`PyEval_GetLocals` in an - :term:`optimized scope` now returns a write-through proxy rather than a - snapshot that gets updated at ill-specified times. If a snapshot is desired, - it must be created explicitly (e.g. with :c:func:`PyDict_Copy`) or by calling - the new :c:func:`PyEval_GetFrameLocals` API. (Changed as part of :pep:`667`.) +.. _pep667-porting-notes-c: + +* The effects of mutating the dictionary returned from :c:func:`PyEval_GetLocals` in an + :term:`optimized scope` have changed. New dict entries added this way will now *only* be + visible to subsequent :c:func:`PyEval_GetLocals` calls in that frame, as + :c:func:`PyFrame_GetLocals`, :func:`locals`, and + :attr:`FrameType.f_locals ` no longer access the same underlying cached + dictionary. Changes made to entries for actual variable names and names added via the + write-through proxy interfaces will be overwritten on subsequent calls to + :c:func:`PyEval_GetLocals` in that frame. The recommended code update depends on how the + function was being used, so refer to the deprecation notice on the function for details. + (Changed as part of :pep:`667`.) + +* Calling :c:func:`PyFrame_GetLocals` in an :term:`optimized scope` now returns a + write-through proxy rather than a snapshot that gets updated at ill-specified times. + If a snapshot is desired, it must be created explicitly (e.g. with :c:func:`PyDict_Copy`) + or by calling the new :c:func:`PyEval_GetFrameLocals` API. (Changed as part of :pep:`667`.) * :c:func:`!PyFrame_FastToLocals` and :c:func:`!PyFrame_FastToLocalsWithError` no longer have any effect. Calling these functions has been redundant since - Python 3.11, when :c:func:`PyFrame_GetLocals` was first introduced. + Python 3.11, when :c:func:`PyFrame_GetLocals` was first introduced. (Changed as part of :pep:`667`.) * :c:func:`!PyFrame_LocalsToFast` no longer has any effect. Calling this function @@ -2509,6 +2524,11 @@ Deprecated C APIs :c:func:`PyWeakref_GetRef` on Python 3.12 and older. (Contributed by Victor Stinner in :gh:`105927`.) +* Deprecate the :c:func:`PyEval_GetBuiltins`, :c:func:`PyEval_GetGlobals`, and + :c:func:`PyEval_GetLocals` functions, which return a :term:`borrowed reference`. + Refer to the deprecation notices on each function for their recommended replacements. + (Soft deprecated as part of :pep:`667`.) + Pending Removal in Python 3.14 ------------------------------ From 4aed319a8eb63b205d6007c36713cacdbf1ce8a3 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 2 Jun 2024 10:27:20 +0300 Subject: [PATCH 037/373] gh-119775: Remove ability to create immutable types with mutable bases (#119776) --- Doc/whatsnew/3.14.rst | 2 ++ Lib/test/test_capi/test_misc.py | 28 ++----------------- ...-05-30-12-51-21.gh-issue-119775.CBq9IG.rst | 2 ++ Objects/typeobject.c | 16 ++++------- 4 files changed, 13 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-05-30-12-51-21.gh-issue-119775.CBq9IG.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index d443cf9bc56b98..45ffb281fcc032 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -258,3 +258,5 @@ Deprecated Removed ------- +* Creating :c:data:`immutable types ` with mutable + bases was deprecated since 3.12 and now raises a :exc:`TypeError`. diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index f3d16e4a2fc92a..0dc0b530aec971 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -777,33 +777,11 @@ def test_pytype_fromspec_with_repeated_slots(self): with self.assertRaises(SystemError): _testcapi.create_type_from_repeated_slots(variant) - @warnings_helper.ignore_warnings(category=DeprecationWarning) def test_immutable_type_with_mutable_base(self): - # Add deprecation warning here so it's removed in 3.14 - warnings._deprecated( - 'creating immutable classes with mutable bases', remove=(3, 14)) - - class MutableBase: - def meth(self): - return 'original' - - with self.assertWarns(DeprecationWarning): - ImmutableSubclass = _testcapi.make_immutable_type_with_base( - MutableBase) - instance = ImmutableSubclass() + class MutableBase: ... - self.assertEqual(instance.meth(), 'original') - - # Cannot override the static type's method - with self.assertRaisesRegex( - TypeError, - "cannot set 'meth' attribute of immutable type"): - ImmutableSubclass.meth = lambda self: 'overridden' - self.assertEqual(instance.meth(), 'original') - - # Can change the method on the mutable base - MutableBase.meth = lambda self: 'changed' - self.assertEqual(instance.meth(), 'changed') + with self.assertRaisesRegex(TypeError, 'Creating immutable type'): + _testcapi.make_immutable_type_with_base(MutableBase) def test_pynumber_tobase(self): from _testcapi import pynumber_tobase diff --git a/Misc/NEWS.d/next/C API/2024-05-30-12-51-21.gh-issue-119775.CBq9IG.rst b/Misc/NEWS.d/next/C API/2024-05-30-12-51-21.gh-issue-119775.CBq9IG.rst new file mode 100644 index 00000000000000..c342a3814ed5db --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-05-30-12-51-21.gh-issue-119775.CBq9IG.rst @@ -0,0 +1,2 @@ +Creating :c:data:`immutable types ` with mutable +bases was deprecated since 3.12 and now raises a :exc:`TypeError`. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 290306cdb677e5..0095a79a2cafec 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4613,16 +4613,12 @@ _PyType_FromMetaclass_impl( goto finally; } if (!_PyType_HasFeature(b, Py_TPFLAGS_IMMUTABLETYPE)) { - if (PyErr_WarnFormat( - PyExc_DeprecationWarning, - 0, - "Creating immutable type %s from mutable base %s is " - "deprecated, and slated to be disallowed in Python 3.14.", - spec->name, - b->tp_name)) - { - goto finally; - } + PyErr_Format( + PyExc_TypeError, + "Creating immutable type %s from mutable base %N", + spec->name, b + ); + goto finally; } } } From f79ffc879b919604ed5de22ece83825006cf9a17 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sun, 2 Jun 2024 10:16:49 +0100 Subject: [PATCH 038/373] gh-119740: Remove deprecated trunc delegation (#119743) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the delegation of `int` to the `__trunc__` special method: `int` will now only delegate to `__int__` and `__index__` (in that order). `__trunc__` continues to exist, but its sole purpose is to support `math.trunc`. --------- Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Serhiy Storchaka --- Doc/library/functions.rst | 11 +-- Doc/reference/datamodel.rst | 7 +- Doc/whatsnew/3.14.rst | 5 + .../pycore_global_objects_fini_generated.h | 1 - Include/internal/pycore_global_strings.h | 1 - .../internal/pycore_runtime_init_generated.h | 1 - .../internal/pycore_unicodeobject_generated.h | 3 - Lib/test/test_int.py | 96 +------------------ Lib/test/test_long.py | 9 -- ...-05-29-18-53-43.gh-issue-119740.zP2JNM.rst | 2 + Objects/abstract.c | 32 ------- 11 files changed, 16 insertions(+), 152 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-29-18-53-43.gh-issue-119740.zP2JNM.rst diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7291461c69acd2..4617767a71be18 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1004,9 +1004,8 @@ are always available. They are listed here in alphabetical order. 115 If the argument defines :meth:`~object.__int__`, - ``int(x)`` returns ``x.__int__()``. If the argument defines :meth:`~object.__index__`, - it returns ``x.__index__()``. If the argument defines :meth:`~object.__trunc__`, - it returns ``x.__trunc__()``. + ``int(x)`` returns ``x.__int__()``. If the argument defines + :meth:`~object.__index__`, it returns ``x.__index__()``. For floating point numbers, this truncates towards zero. If the argument is not a number or if *base* is given, then it must be a string, @@ -1044,9 +1043,6 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.8 Falls back to :meth:`~object.__index__` if :meth:`~object.__int__` is not defined. - .. versionchanged:: 3.11 - The delegation to :meth:`~object.__trunc__` is deprecated. - .. versionchanged:: 3.11 :class:`int` string inputs and string representations can be limited to help avoid denial of service attacks. A :exc:`ValueError` is raised when @@ -1055,6 +1051,9 @@ are always available. They are listed here in alphabetical order. See the :ref:`integer string conversion length limitation ` documentation. + .. versionchanged:: 3.14 + :func:`int` no longer delegates to the :meth:`~object.__trunc__` method. + .. function:: isinstance(object, classinfo) Return ``True`` if the *object* argument is an instance of the *classinfo* diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 9110060a6177e5..af4c585e1c3e2f 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -3127,11 +3127,8 @@ left undefined. return the value of the object truncated to an :class:`~numbers.Integral` (typically an :class:`int`). - The built-in function :func:`int` falls back to :meth:`__trunc__` if neither - :meth:`__int__` nor :meth:`__index__` is defined. - - .. versionchanged:: 3.11 - The delegation of :func:`int` to :meth:`__trunc__` is deprecated. + .. versionchanged:: 3.14 + :func:`int` no longer delegates to the :meth:`~object.__trunc__` method. .. _context-managers: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 45ffb281fcc032..9f471d24909215 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -225,6 +225,11 @@ Others It had previously raised a :exc:`DeprecationWarning` since Python 3.9. (Contributed by Jelle Zijlstra in :gh:`118767`.) +* The :func:`int` built-in no longer delegates to + :meth:`~object.__trunc__`. Classes that want to support conversion to + integer must implement either :meth:`~object.__int__` or + :meth:`~object.__index__`. (Contributed by Mark Dickinson in :gh:`119743`.) + Porting to Python 3.14 ====================== diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index a0f8fb71c1ff37..b9fae11dfaa85c 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -732,7 +732,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__subclasscheck__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__subclasshook__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__truediv__)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__trunc__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__type_params__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_is_unpacked_typevartuple__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_prepare_subst__)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 57d85020f14e05..aa66b20859a472 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -221,7 +221,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(__subclasscheck__) STRUCT_FOR_ID(__subclasshook__) STRUCT_FOR_ID(__truediv__) - STRUCT_FOR_ID(__trunc__) STRUCT_FOR_ID(__type_params__) STRUCT_FOR_ID(__typing_is_unpacked_typevartuple__) STRUCT_FOR_ID(__typing_prepare_subst__) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index e62ebd659d30e8..b27720e9ff6ecf 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -730,7 +730,6 @@ extern "C" { INIT_ID(__subclasscheck__), \ INIT_ID(__subclasshook__), \ INIT_ID(__truediv__), \ - INIT_ID(__trunc__), \ INIT_ID(__type_params__), \ INIT_ID(__typing_is_unpacked_typevartuple__), \ INIT_ID(__typing_prepare_subst__), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 892f580e8a6846..c61c556b758769 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -504,9 +504,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(__truediv__); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); - string = &_Py_ID(__trunc__); - assert(_PyUnicode_CheckConsistency(string, 1)); - _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(__type_params__); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index caeccbe1fed026..ce9febd741bba2 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -402,68 +402,8 @@ def __trunc__(self): class JustTrunc(base): def __trunc__(self): return 42 - with self.assertWarns(DeprecationWarning): - self.assertEqual(int(JustTrunc()), 42) - - class ExceptionalTrunc(base): - def __trunc__(self): - 1 / 0 - with self.assertRaises(ZeroDivisionError), \ - self.assertWarns(DeprecationWarning): - int(ExceptionalTrunc()) - - for trunc_result_base in (object, Classic): - class Index(trunc_result_base): - def __index__(self): - return 42 - - class TruncReturnsNonInt(base): - def __trunc__(self): - return Index() - with self.assertWarns(DeprecationWarning): - self.assertEqual(int(TruncReturnsNonInt()), 42) - - class Intable(trunc_result_base): - def __int__(self): - return 42 - - class TruncReturnsNonIndex(base): - def __trunc__(self): - return Intable() - with self.assertWarns(DeprecationWarning): - self.assertEqual(int(TruncReturnsNonInt()), 42) - - class NonIntegral(trunc_result_base): - def __trunc__(self): - # Check that we avoid infinite recursion. - return NonIntegral() - - class TruncReturnsNonIntegral(base): - def __trunc__(self): - return NonIntegral() - try: - with self.assertWarns(DeprecationWarning): - int(TruncReturnsNonIntegral()) - except TypeError as e: - self.assertEqual(str(e), - "__trunc__ returned non-Integral" - " (type NonIntegral)") - else: - self.fail("Failed to raise TypeError with %s" % - ((base, trunc_result_base),)) - - # Regression test for bugs.python.org/issue16060. - class BadInt(trunc_result_base): - def __int__(self): - return 42.0 - - class TruncReturnsBadInt(base): - def __trunc__(self): - return BadInt() - - with self.assertRaises(TypeError), \ - self.assertWarns(DeprecationWarning): - int(TruncReturnsBadInt()) + with self.assertRaises(TypeError): + int(JustTrunc()) def test_int_subclass_with_index(self): class MyIndex(int): @@ -514,18 +454,6 @@ class BadInt2(int): def __int__(self): return True - class TruncReturnsBadIndex: - def __trunc__(self): - return BadIndex() - - class TruncReturnsBadInt: - def __trunc__(self): - return BadInt() - - class TruncReturnsIntSubclass: - def __trunc__(self): - return True - bad_int = BadIndex() with self.assertWarns(DeprecationWarning): n = int(bad_int) @@ -549,26 +477,6 @@ def __trunc__(self): self.assertEqual(n, 1) self.assertIs(type(n), int) - bad_int = TruncReturnsBadIndex() - with self.assertWarns(DeprecationWarning): - n = int(bad_int) - self.assertEqual(n, 1) - self.assertIs(type(n), int) - - bad_int = TruncReturnsBadInt() - with self.assertWarns(DeprecationWarning): - self.assertRaises(TypeError, int, bad_int) - - good_int = TruncReturnsIntSubclass() - with self.assertWarns(DeprecationWarning): - n = int(good_int) - self.assertEqual(n, 1) - self.assertIs(type(n), int) - with self.assertWarns(DeprecationWarning): - n = IntSubclass(good_int) - self.assertEqual(n, 1) - self.assertIs(type(n), IntSubclass) - def test_error_message(self): def check(s, base=None): with self.assertRaises(ValueError, diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 41b973da2c7df0..3b2e7c4e71d10d 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -386,15 +386,6 @@ def __long__(self): return 42 self.assertRaises(TypeError, int, JustLong()) - class LongTrunc: - # __long__ should be ignored in 3.x - def __long__(self): - return 42 - def __trunc__(self): - return 1729 - with self.assertWarns(DeprecationWarning): - self.assertEqual(int(LongTrunc()), 1729) - def check_float_conversion(self, n): # Check that int -> float conversion behaviour matches # that of the pure Python version above. diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-29-18-53-43.gh-issue-119740.zP2JNM.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-29-18-53-43.gh-issue-119740.zP2JNM.rst new file mode 100644 index 00000000000000..111e096d262ea0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-29-18-53-43.gh-issue-119740.zP2JNM.rst @@ -0,0 +1,2 @@ +Remove the previously-deprecated delegation of :func:`int` to +:meth:`~object.__trunc__`. diff --git a/Objects/abstract.c b/Objects/abstract.c index 8357175aa5591e..200817064e3cda 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1521,7 +1521,6 @@ PyNumber_Long(PyObject *o) { PyObject *result; PyNumberMethods *m; - PyObject *trunc_func; Py_buffer view; if (o == NULL) { @@ -1563,37 +1562,6 @@ PyNumber_Long(PyObject *o) if (m && m->nb_index) { return PyNumber_Index(o); } - trunc_func = _PyObject_LookupSpecial(o, &_Py_ID(__trunc__)); - if (trunc_func) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "The delegation of int() to __trunc__ is deprecated.", 1)) { - Py_DECREF(trunc_func); - return NULL; - } - result = _PyObject_CallNoArgs(trunc_func); - Py_DECREF(trunc_func); - if (result == NULL || PyLong_CheckExact(result)) { - return result; - } - if (PyLong_Check(result)) { - Py_SETREF(result, _PyLong_Copy((PyLongObject *)result)); - return result; - } - /* __trunc__ is specified to return an Integral type, - but int() needs to return an int. */ - if (!PyIndex_Check(result)) { - PyErr_Format( - PyExc_TypeError, - "__trunc__ returned non-Integral (type %.200s)", - Py_TYPE(result)->tp_name); - Py_DECREF(result); - return NULL; - } - Py_SETREF(result, PyNumber_Index(result)); - return result; - } - if (PyErr_Occurred()) - return NULL; if (PyUnicode_Check(o)) /* The below check is done in PyLong_FromUnicodeObject(). */ From f3b89a63cbb6d46e5ed40d5cd9813cdf9189ce35 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sun, 2 Jun 2024 10:19:02 -0400 Subject: [PATCH 039/373] gh-117657: Fix TSAN reported race in `_PyEval_IsGILEnabled`. (#119921) The GIL may be disabled concurrently with this call so we need to use a relaxed atomic load. --- Include/internal/pycore_ceval.h | 3 ++- Tools/tsan/suppressions_free_threading.txt | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index bd3ba1225f2597..26ede31b1904b4 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -145,7 +145,8 @@ extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *, static inline int _PyEval_IsGILEnabled(PyThreadState *tstate) { - return tstate->interp->ceval.gil->enabled != 0; + struct _gil_runtime_state *gil = tstate->interp->ceval.gil; + return _Py_atomic_load_int_relaxed(&gil->enabled) != 0; } // Enable or disable the GIL used by the interpreter that owns tstate, which diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index f855e9ce2698a5..78dac6ee0c9068 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -65,7 +65,6 @@ race_top:list_get_item_ref race_top:make_pending_calls race_top:set_add_entry race_top:should_intern_string -race_top:_PyEval_IsGILEnabled race_top:llist_insert_tail race_top:_Py_slot_tp_getattr_hook race_top:add_threadstate From aa9fe98e0649f0a151942914ef4e2810ca6126c2 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sun, 2 Jun 2024 08:13:24 -0700 Subject: [PATCH 040/373] Improve documentation for typing.get_type_hints (#119928) - Explicit list of what it does that is different from "just return __annotations__" - Remove reference to PEP 563; adding the future import doesn't do anything to type aliases, and in general it will never make get_type_hints() less likely to fail. - Remove example, as the Annotated docs already have a similar example, and it's unbalanced to have one example about this one edge case but not about other behaviors of the function. Co-authored-by: Alex Waygood --- Doc/library/typing.rst | 54 ++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index a8068609fcfbe7..94de64fcf835fc 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -3080,35 +3080,37 @@ Introspection helpers Return a dictionary containing type hints for a function, method, module or class object. - This is often the same as ``obj.__annotations__``. In addition, - forward references encoded as string literals are handled by evaluating - them in ``globals``, ``locals`` and (where applicable) - :ref:`type parameter ` namespaces. - For a class ``C``, return - a dictionary constructed by merging all the ``__annotations__`` along - ``C.__mro__`` in reverse order. - - The function recursively replaces all ``Annotated[T, ...]`` with ``T``, - unless ``include_extras`` is set to ``True`` (see :class:`Annotated` for - more information). For example: - - .. testcode:: - - class Student(NamedTuple): - name: Annotated[str, 'some marker'] - - assert get_type_hints(Student) == {'name': str} - assert get_type_hints(Student, include_extras=False) == {'name': str} - assert get_type_hints(Student, include_extras=True) == { - 'name': Annotated[str, 'some marker'] - } + This is often the same as ``obj.__annotations__``, but this function makes + the following changes to the annotations dictionary: + + * Forward references encoded as string literals or :class:`ForwardRef` + objects are handled by evaluating them in *globalns*, *localns*, and + (where applicable) *obj*'s :ref:`type parameter ` namespace. + If *globalns* or *localns* is not given, appropriate namespace + dictionaries are inferred from *obj*. + * ``None`` is replaced with :class:`types.NoneType`. + * If :func:`@no_type_check ` has been applied to *obj*, an + empty dictionary is returned. + * If *obj* is a class ``C``, the function returns a dictionary that merges + annotations from ``C``'s base classes with those on ``C`` directly. This + is done by traversing ``C.__mro__`` and iteratively combining + ``__annotations__`` dictionaries. Annotations on classes appearing + earlier in the :term:`method resolution order` always take precedence over + annotations on classes appearing later in the method resolution order. + * The function recursively replaces all occurrences of ``Annotated[T, ...]`` + with ``T``, unless *include_extras* is set to ``True`` (see + :class:`Annotated` for more information). + + See also :func:`inspect.get_annotations`, a lower-level function that + returns annotations more directly. .. note:: - :func:`get_type_hints` does not work with imported - :ref:`type aliases ` that include forward references. - Enabling postponed evaluation of annotations (:pep:`563`) may remove - the need for most forward references. + If any forward references in the annotations of *obj* are not resolvable + or are not valid Python code, this function will raise an exception + such as :exc:`NameError`. For example, this can happen with imported + :ref:`type aliases ` that include forward references, + or with names imported under :data:`if TYPE_CHECKING `. .. versionchanged:: 3.9 Added ``include_extras`` parameter as part of :pep:`593`. From bd6d4ed6454378e48dab06f50a9be0bae6baa3a2 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 2 Jun 2024 20:39:19 +0100 Subject: [PATCH 041/373] GH-119054: Add "Reading and writing files" section to pathlib docs (#119524) Add a dedicated subsection for `open()`, `read_text()`, `read_bytes()`, `write_text()` and `write_bytes()`. --- Doc/library/pathlib.rst | 163 +++++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 79 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index c72d409a8eb2d6..f37bb33321fa53 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1067,6 +1067,90 @@ Querying file type and status .. versionadded:: 3.5 +Reading and writing files +^^^^^^^^^^^^^^^^^^^^^^^^^ + + +.. method:: Path.open(mode='r', buffering=-1, encoding=None, errors=None, newline=None) + + Open the file pointed to by the path, like the built-in :func:`open` + function does:: + + >>> p = Path('setup.py') + >>> with p.open() as f: + ... f.readline() + ... + '#!/usr/bin/env python3\n' + + +.. method:: Path.read_text(encoding=None, errors=None, newline=None) + + Return the decoded contents of the pointed-to file as a string:: + + >>> p = Path('my_text_file') + >>> p.write_text('Text file contents') + 18 + >>> p.read_text() + 'Text file contents' + + The file is opened and then closed. The optional parameters have the same + meaning as in :func:`open`. + + .. versionadded:: 3.5 + + .. versionchanged:: 3.13 + The *newline* parameter was added. + + +.. method:: Path.read_bytes() + + Return the binary contents of the pointed-to file as a bytes object:: + + >>> p = Path('my_binary_file') + >>> p.write_bytes(b'Binary file contents') + 20 + >>> p.read_bytes() + b'Binary file contents' + + .. versionadded:: 3.5 + + +.. method:: Path.write_text(data, encoding=None, errors=None, newline=None) + + Open the file pointed to in text mode, write *data* to it, and close the + file:: + + >>> p = Path('my_text_file') + >>> p.write_text('Text file contents') + 18 + >>> p.read_text() + 'Text file contents' + + An existing file of the same name is overwritten. The optional parameters + have the same meaning as in :func:`open`. + + .. versionadded:: 3.5 + + .. versionchanged:: 3.10 + The *newline* parameter was added. + + +.. method:: Path.write_bytes(data) + + Open the file pointed to in bytes mode, write *data* to it, and close the + file:: + + >>> p = Path('my_binary_file') + >>> p.write_bytes(b'Binary file contents') + 20 + >>> p.read_bytes() + b'Binary file contents' + + An existing file of the same name is overwritten. + + .. versionadded:: 3.5 + + Other methods ^^^^^^^^^^^^^ @@ -1360,18 +1444,6 @@ example because the path doesn't exist). The *exist_ok* parameter was added. -.. method:: Path.open(mode='r', buffering=-1, encoding=None, errors=None, newline=None) - - Open the file pointed to by the path, like the built-in :func:`open` - function does:: - - >>> p = Path('setup.py') - >>> with p.open() as f: - ... f.readline() - ... - '#!/usr/bin/env python3\n' - - .. method:: Path.owner(*, follow_symlinks=True) Return the name of the user owning the file. :exc:`KeyError` is raised @@ -1388,37 +1460,6 @@ example because the path doesn't exist). The *follow_symlinks* parameter was added. -.. method:: Path.read_bytes() - - Return the binary contents of the pointed-to file as a bytes object:: - - >>> p = Path('my_binary_file') - >>> p.write_bytes(b'Binary file contents') - 20 - >>> p.read_bytes() - b'Binary file contents' - - .. versionadded:: 3.5 - - -.. method:: Path.read_text(encoding=None, errors=None, newline=None) - - Return the decoded contents of the pointed-to file as a string:: - - >>> p = Path('my_text_file') - >>> p.write_text('Text file contents') - 18 - >>> p.read_text() - 'Text file contents' - - The file is opened and then closed. The optional parameters have the same - meaning as in :func:`open`. - - .. versionadded:: 3.5 - - .. versionchanged:: 3.13 - The *newline* parameter was added. - .. method:: Path.readlink() Return the path to which the symbolic link points (as returned by @@ -1593,42 +1634,6 @@ example because the path doesn't exist). The *missing_ok* parameter was added. -.. method:: Path.write_bytes(data) - - Open the file pointed to in bytes mode, write *data* to it, and close the - file:: - - >>> p = Path('my_binary_file') - >>> p.write_bytes(b'Binary file contents') - 20 - >>> p.read_bytes() - b'Binary file contents' - - An existing file of the same name is overwritten. - - .. versionadded:: 3.5 - - -.. method:: Path.write_text(data, encoding=None, errors=None, newline=None) - - Open the file pointed to in text mode, write *data* to it, and close the - file:: - - >>> p = Path('my_text_file') - >>> p.write_text('Text file contents') - 18 - >>> p.read_text() - 'Text file contents' - - An existing file of the same name is overwritten. The optional parameters - have the same meaning as in :func:`open`. - - .. versionadded:: 3.5 - - .. versionchanged:: 3.10 - The *newline* parameter was added. - - .. _pathlib-pattern-language: Pattern language From 117a8acdab997b73ada822cce97815a86f839e15 Mon Sep 17 00:00:00 2001 From: Solomon Himelbloom <7608183+TechSolomon@users.noreply.github.com> Date: Sun, 2 Jun 2024 16:43:03 -0800 Subject: [PATCH 042/373] gh-109975: What's New in Python 3.13: fix broken link for `telnetlib` alternative (#119958) --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 66626ac06428b9..903de3c04b4a07 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1334,7 +1334,7 @@ PEP 594: dead batteries (and other module removals) * :mod:`!sunau`. (Contributed by Victor Stinner in :gh:`104773`.) - * :mod:`!telnetlib`, use the projects :pypi:`telnetlib3 ` or + * :mod:`!telnetlib`, use the projects :pypi:`telnetlib3` or :pypi:`Exscript` instead. (Contributed by Victor Stinner in :gh:`104773`.) From 0594a27e5f1d87d59fa8a761dd8ca9df4e42816d Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 3 Jun 2024 12:22:41 +0900 Subject: [PATCH 043/373] gh-117657: Fix data races report by TSAN unicode-hash (gh-119907) --- Objects/unicodeobject.c | 19 +++++++++++-------- Tools/tsan/suppressions_free_threading.txt | 1 - 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index eb37b478cc4de1..12782754753ef5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1633,7 +1633,7 @@ unicode_modifiable(PyObject *unicode) assert(_PyUnicode_CHECK(unicode)); if (Py_REFCNT(unicode) != 1) return 0; - if (_PyUnicode_HASH(unicode) != -1) + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(unicode)) != -1) return 0; if (PyUnicode_CHECK_INTERNED(unicode)) return 0; @@ -10901,9 +10901,10 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) if (PyUnicode_CHECK_INTERNED(left)) return 0; - assert(_PyUnicode_HASH(right_uni) != -1); - Py_hash_t hash = _PyUnicode_HASH(left); - if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) { + Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(right_uni)); + assert(right_hash != -1); + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(left)); + if (hash != -1 && hash != right_hash) { return 0; } @@ -11388,12 +11389,14 @@ unicode_hash(PyObject *self) #ifdef Py_DEBUG assert(_Py_HashSecret_Initialized); #endif - if (_PyUnicode_HASH(self) != -1) - return _PyUnicode_HASH(self); - + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(self)); + if (hash != -1) { + return hash; + } x = _Py_HashBytes(PyUnicode_DATA(self), PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self)); - _PyUnicode_HASH(self) = x; + + FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x); return x; } diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 78dac6ee0c9068..ff9c8036e92fe7 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -49,7 +49,6 @@ race_top:set_discard_entry race_top:set_inheritable race_top:start_the_world race_top:tstate_set_detached -race_top:unicode_hash race_top:Py_SET_TYPE race_top:_PyDict_CheckConsistency race_top:_PyImport_AcquireLock From 8e6321efd72d12263398994e59c5216edcada3c0 Mon Sep 17 00:00:00 2001 From: wookie184 Date: Mon, 3 Jun 2024 07:05:57 +0100 Subject: [PATCH 044/373] gh-119961: Fix test workflow status badge in README (#119962) Co-authored-by: Nikita Sobolev --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index e3163c5ff636ab..7dd3660b198784 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ This is Python version 3.14.0 alpha 0 ===================================== -.. image:: https://github.com/python/cpython/workflows/Tests/badge.svg +.. image:: https://github.com/python/cpython/actions/workflows/build.yml/badge.svg?branch=main&event=push :alt: CPython build status on GitHub Actions :target: https://github.com/python/cpython/actions From 3ea9b92086240b2f38a74c6945e7a723b480cefe Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 3 Jun 2024 08:45:20 +0200 Subject: [PATCH 045/373] gh-119396: Optimize unicode_decode_utf8_writer() (#119957) Optimize unicode_decode_utf8_writer() Take the ascii_decode() fast-path even if dest is not aligned on size_t bytes. --- Objects/unicodeobject.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 12782754753ef5..53160f1799f2cc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4702,8 +4702,9 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest) const char *p = start; #if SIZEOF_SIZE_T <= SIZEOF_VOID_P - assert(_Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T)); - if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { + if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T) + && _Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T)) + { /* Fast path, see in STRINGLIB(utf8_decode) for an explanation. */ /* Help allocation */ @@ -4948,9 +4949,7 @@ unicode_decode_utf8_writer(_PyUnicodeWriter *writer, const char *end = s + size; Py_ssize_t decoded = 0; Py_UCS1 *dest = (Py_UCS1*)writer->data + writer->pos * writer->kind; - if (writer->kind == PyUnicode_1BYTE_KIND - && _Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T)) - { + if (writer->kind == PyUnicode_1BYTE_KIND) { decoded = ascii_decode(s, end, dest); writer->pos += decoded; From 52586f930f62bd80374f0f240a4ecce0c0238174 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov <52372310+chgnrdv@users.noreply.github.com> Date: Mon, 3 Jun 2024 10:47:36 +0300 Subject: [PATCH 046/373] gh-119506: fix `_io.TextIOWrapper.write()` write during flush (#119507) Co-authored-by: Inada Naoki --- Lib/test/test_io.py | 22 +++++++++++++ ...-05-24-14-32-24.gh-issue-119506.-nMNqq.rst | 1 + Modules/_io/textio.c | 31 +++++++++++++------ 3 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index e5cb08c2cdd04c..1ca3edac8c8dc9 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4016,6 +4016,28 @@ def write(self, data): t.write("x"*chunk_size) self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack) + def test_issue119506(self): + chunk_size = 8192 + + class MockIO(self.MockRawIO): + written = False + def write(self, data): + if not self.written: + self.written = True + t.write("middle") + return super().write(data) + + buf = MockIO() + t = self.TextIOWrapper(buf) + t.write("abc") + t.write("def") + # writing data which size >= chunk_size cause flushing buffer before write. + t.write("g" * chunk_size) + t.flush() + + self.assertEqual([b"abcdef", b"middle", b"g"*chunk_size], + buf._write_stack) + class PyTextIOWrapperTest(TextIOWrapperTest): io = pyio diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst new file mode 100644 index 00000000000000..f9b764ae0c49b3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst @@ -0,0 +1 @@ +Fix :meth:`!io.TextIOWrapper.write` method breaks internal buffer when the method is called again during flushing internal buffer. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 9dff8eafb2560f..c162d8106ec1fd 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1719,16 +1719,26 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) bytes_len = PyBytes_GET_SIZE(b); } - if (self->pending_bytes == NULL) { - self->pending_bytes_count = 0; - self->pending_bytes = b; - } - else if (self->pending_bytes_count + bytes_len > self->chunk_size) { - // Prevent to concatenate more than chunk_size data. - if (_textiowrapper_writeflush(self) < 0) { - Py_DECREF(b); - return NULL; + // We should avoid concatinating huge data. + // Flush the buffer before adding b to the buffer if b is not small. + // https://github.com/python/cpython/issues/87426 + if (bytes_len >= self->chunk_size) { + // _textiowrapper_writeflush() calls buffer.write(). + // self->pending_bytes can be appended during buffer->write() + // or other thread. + // We need to loop until buffer becomes empty. + // https://github.com/python/cpython/issues/118138 + // https://github.com/python/cpython/issues/119506 + while (self->pending_bytes != NULL) { + if (_textiowrapper_writeflush(self) < 0) { + Py_DECREF(b); + return NULL; + } } + } + + if (self->pending_bytes == NULL) { + assert(self->pending_bytes_count == 0); self->pending_bytes = b; } else if (!PyList_CheckExact(self->pending_bytes)) { @@ -1737,6 +1747,9 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) Py_DECREF(b); return NULL; } + // Since Python 3.12, allocating GC object won't trigger GC and release + // GIL. See https://github.com/python/cpython/issues/97922 + assert(!PyList_CheckExact(self->pending_bytes)); PyList_SET_ITEM(list, 0, self->pending_bytes); PyList_SET_ITEM(list, 1, b); self->pending_bytes = list; From 84c3191954b40e090db15da49a59fcc40afe34fd Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 3 Jun 2024 10:50:29 +0300 Subject: [PATCH 047/373] gh-118827: Remove `Quoter` from `urllib.parse` (#118828) Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/whatsnew/3.14.rst | 7 +++++++ Lib/test/test_urlparse.py | 7 ------- Lib/urllib/parse.py | 8 -------- .../2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst | 3 +++ 4 files changed, 10 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9f471d24909215..47f3e30942397f 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -218,6 +218,13 @@ typing * Remove :class:`!typing.ByteString`. It had previously raised a :exc:`DeprecationWarning` since Python 3.12. +urllib +------ + +* Remove deprecated :class:`!Quoter` class from :mod:`urllib.parse`. + It had previously raised a :exc:`DeprecationWarning` since Python 3.11. + (Contributed by Nikita Sobolev in :gh:`118827`.) + Others ------ diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4faad733245df9..d6c83a75c1c03a 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1507,13 +1507,6 @@ def test_unwrap(self): class DeprecationTest(unittest.TestCase): - - def test_Quoter_deprecation(self): - with self.assertWarns(DeprecationWarning) as cm: - old_class = urllib.parse.Quoter - self.assertIs(old_class, urllib.parse._Quoter) - self.assertIn('Quoter will be removed', str(cm.warning)) - def test_splittype_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splittype('') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 3932bb99c7e7d1..8f724f907d4217 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -822,14 +822,6 @@ def unquote_plus(string, encoding='utf-8', errors='replace'): b'_.-~') _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) -def __getattr__(name): - if name == 'Quoter': - warnings.warn('Deprecated in 3.11. ' - 'urllib.parse.Quoter will be removed in Python 3.14. ' - 'It was not intended to be a public API.', - DeprecationWarning, stacklevel=2) - return _Quoter - raise AttributeError(f'module {__name__!r} has no attribute {name!r}') class _Quoter(dict): """A mapping from bytes numbers (in range(0,256)) to strings. diff --git a/Misc/NEWS.d/next/Library/2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst b/Misc/NEWS.d/next/Library/2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst new file mode 100644 index 00000000000000..40612dd93bd6da --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst @@ -0,0 +1,3 @@ +Remove deprecated :class:`!Quoter` class from :mod:`urllib.parse`. It had +previously raised a :exc:`DeprecationWarning` since Python 3.11. +Patch by Nikita Sobolev. From 1e5f615086d23c71a9701abe641b5241e4345234 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 3 Jun 2024 10:52:35 +0300 Subject: [PATCH 048/373] gh-116991: Improve `pygen --help` for `python` subparser (#116992) --- Tools/peg_generator/pegen/__main__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index 262c8a6db68f6e..0b0b4b291c2b0e 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -107,7 +107,10 @@ def generate_python_code( help="Suppress code emission for rule actions", ) -python_parser = subparsers.add_parser("python", help="Generate Python code") +python_parser = subparsers.add_parser( + "python", + help="Generate Python code, needs grammar definition with Python actions", +) python_parser.set_defaults(func=generate_python_code) python_parser.add_argument("grammar_filename", help="Grammar description") python_parser.add_argument( From 4223f1d828d3a3e1c8d803e3fdd420afd7d85faf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 3 Jun 2024 10:15:04 +0200 Subject: [PATCH 049/373] gh-119856: Support exiting help() with just "exit" (#119858) --- Lib/pydoc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 5d854c50f40d6e..2ba597d01f245e 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -2007,7 +2007,7 @@ def interact(self): if (len(request) > 2 and request[0] == request[-1] in ("'", '"') and request[0] not in request[1:-1]): request = request[1:-1] - if request.lower() in ('q', 'quit'): break + if request.lower() in ('q', 'quit', 'exit'): break if request == 'help': self.intro() else: @@ -2059,7 +2059,7 @@ def intro(self): enter "modules spam". To quit this help utility and return to the interpreter, -enter "q" or "quit". +enter "q", "quit" or "exit". '''.format('%d.%d' % sys.version_info[:2])) def list(self, items, columns=4, width=80): From 70934fb46982ad2ae677cca485a730b39635919c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 3 Jun 2024 10:26:13 +0200 Subject: [PATCH 050/373] gh-112026: Deprecate _PyDict_GetItemStringWithError() function (#119855) --- Include/cpython/dictobject.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index 3fd23b9313c453..e2861c963266ea 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -37,7 +37,8 @@ typedef struct { PyAPI_FUNC(PyObject *) _PyDict_GetItem_KnownHash(PyObject *mp, PyObject *key, Py_hash_t hash); -PyAPI_FUNC(PyObject *) _PyDict_GetItemStringWithError(PyObject *, const char *); +// PyDict_GetItemStringRef() can be used instead +Py_DEPRECATED(3.14) PyAPI_FUNC(PyObject *) _PyDict_GetItemStringWithError(PyObject *, const char *); PyAPI_FUNC(PyObject *) PyDict_SetDefault( PyObject *mp, PyObject *key, PyObject *defaultobj); From d7fcaa73b71f4c49c1b24cac04c9b6f1cf69b944 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 3 Jun 2024 12:29:01 +0300 Subject: [PATCH 051/373] gh-119838: Treat Fraction as a real value in mixed arithmetic operations with complex (GH-119839) --- Lib/fractions.py | 4 ++-- Lib/test/test_fractions.py | 5 +---- .../Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst | 3 +++ 3 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst diff --git a/Lib/fractions.py b/Lib/fractions.py index 95adccd86e33a0..565503911bbe97 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -668,7 +668,7 @@ def forward(a, b): elif isinstance(b, float): return fallback_operator(float(a), b) elif handle_complex and isinstance(b, complex): - return fallback_operator(complex(a), b) + return fallback_operator(float(a), b) else: return NotImplemented forward.__name__ = '__' + fallback_operator.__name__ + '__' @@ -681,7 +681,7 @@ def reverse(b, a): elif isinstance(a, numbers.Real): return fallback_operator(float(a), float(b)) elif handle_complex and isinstance(a, numbers.Complex): - return fallback_operator(complex(a), complex(b)) + return fallback_operator(complex(a), float(b)) else: return NotImplemented reverse.__name__ = '__r' + fallback_operator.__name__ + '__' diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index 3c7780e40db096..71865f68eb0f12 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -806,10 +806,7 @@ def testMixedMultiplication(self): self.assertTypedEquals(F(3, 2) * Polar(4, 2), Polar(F(6, 1), 2)) self.assertTypedEquals(F(3, 2) * Polar(4.0, 2), Polar(6.0, 2)) self.assertTypedEquals(F(3, 2) * Rect(4, 3), Rect(F(6, 1), F(9, 2))) - with self.assertWarnsRegex(DeprecationWarning, - "argument 'real' must be a real number, not complex"): - self.assertTypedEquals(F(3, 2) * RectComplex(4, 3), - RectComplex(6.0+0j, 4.5+0j)) + self.assertTypedEquals(F(3, 2) * RectComplex(4, 3), RectComplex(6.0, 4.5)) self.assertRaises(TypeError, operator.mul, Polar(4, 2), F(3, 2)) self.assertTypedEquals(Rect(4, 3) * F(3, 2), 6.0 + 4.5j) self.assertEqual(F(3, 2) * SymbolicComplex('X'), SymbolicComplex('3/2 * X')) diff --git a/Misc/NEWS.d/next/Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst b/Misc/NEWS.d/next/Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst new file mode 100644 index 00000000000000..17a87327b5b1d6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst @@ -0,0 +1,3 @@ +In mixed arithmetic operations with :class:`~fractions.Fraction` and +complex, the fraction is now converted to :class:`float` instead of +:class:`complex`. From cae4c80714e7266772025676977e2a1b98cdcd7b Mon Sep 17 00:00:00 2001 From: Awbert <119314310+SweetyAngel@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:31:02 +0300 Subject: [PATCH 052/373] gh-119968: Improved monitoring c-api docs (#119969) --- Doc/c-api/monitoring.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/c-api/monitoring.rst b/Doc/c-api/monitoring.rst index ec743b98ba7024..b34035b5548f02 100644 --- a/Doc/c-api/monitoring.rst +++ b/Doc/c-api/monitoring.rst @@ -2,7 +2,7 @@ .. _monitoring: -Monitorong C API +Monitoring C API ================ Added in version 3.13. From 367adc91fb9834eb35b168048fd54705621c3f21 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 3 Jun 2024 10:36:20 +0100 Subject: [PATCH 053/373] gh-119786: move exception handling doc to InternalDocs (#119815) --- InternalDocs/{index.md => README.md} | 4 + InternalDocs/exception_handling.md | 201 +++++++++++++++++++++++++++ Objects/exception_handling_notes.txt | 182 ------------------------ 3 files changed, 205 insertions(+), 182 deletions(-) rename InternalDocs/{index.md => README.md} (92%) create mode 100644 InternalDocs/exception_handling.md delete mode 100644 Objects/exception_handling_notes.txt diff --git a/InternalDocs/index.md b/InternalDocs/README.md similarity index 92% rename from InternalDocs/index.md rename to InternalDocs/README.md index 32b66a254bcf2c..e69e27d1542990 100644 --- a/InternalDocs/index.md +++ b/InternalDocs/README.md @@ -10,3 +10,7 @@ to hold for other implementations of the Python language. The core dev team attempts to keep this documentation up to date. If it is not, please report that through the [issue tracker](https://github.com/python/cpython/issues). + + +[Exception Handling](exception_handling.md) + diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md new file mode 100644 index 00000000000000..22d9c3bf7933f1 --- /dev/null +++ b/InternalDocs/exception_handling.md @@ -0,0 +1,201 @@ +Description of exception handling +--------------------------------- + +Python uses a technique known as "zero-cost" exception handling, which +minimizes the cost of supporting exceptions. In the common case (where +no exception is raised) the cost is reduced to zero (or close to zero). +The cost of raising an exception is increased, but not by much. + +The following code: + +``` +try: + g(0) +except: + res = "fail" + +``` + +compiles into intermediate code like the following: + +``` + RESUME 0 + + 1 SETUP_FINALLY 8 (to L1) + + 2 LOAD_NAME 0 (g) + PUSH_NULL + LOAD_CONST 0 (0) + CALL 1 + POP_TOP + POP_BLOCK + + -- L1: PUSH_EXC_INFO + + 3 POP_TOP + + 4 LOAD_CONST 1 ('fail') + STORE_NAME 1 (res) +``` + +`SETUP_FINALLY` and `POP_BLOCK` are pseudo-instructions. This means +that they can appear in intermediate code but they are not bytecode +instructions. `SETUP_FINALLY` specifies that henceforth, exceptions +are handled by the code at label L1. The `POP_BLOCK` instruction +reverses the effect of the last `SETUP` instruction, so that the +active exception handler reverts to what it was before. + +`SETUP_FINALLY` and `POP_BLOCK` have no effect when no exceptions +are raised. The idea of zero-cost exception handling is to replace +these pseudo-instructions by metadata which is stored alongside the +bytecode, and which is inspected only when an exception occurs. +This metadata is the exception table, and it is stored in the code +object's `co_exceptiontable` field. + +When the pseudo-instructions are translated into bytecode, +`SETUP_FINALLY` and `POP_BLOCK` are removed, and the exception +table is constructed, mapping each instruction to the exception +handler that covers it, if any. Instructions which are not +covered by any exception handler within the same code object's +bytecode, do not appear in the exception table at all. + +For the code object in our example above, the table has a single +entry specifying that all instructions that were between the +`SETUP_FINALLY` and the `POP_BLOCK` are covered by the exception +handler located at label `L1`. + +Handling Exceptions +------------------- + +At runtime, when an exception occurs, the interpreter looks up +the offset of the current instruction in the exception table. If +it finds a handler, control flow transfers to it. Otherwise, the +exception bubbles up to the caller, and the caller's frame is +checked for a handler covering the `CALL` instruction. This +repeats until a handler is found or the topmost frame is reached. +If no handler is found, the program terminates. During unwinding, +the traceback is constructed as each frame is added to it. + +Along with the location of an exception handler, each entry of the +exception table also contains the stack depth of the `try` instruction +and a boolean `lasti` value, which indicates whether the instruction +offset of the raising instruction should be pushed to the stack. + +Handling an exception, once an exception table entry is found, consists +of the following steps: + + 1. pop values from the stack until it matches the stack depth for the handler. + 2. if `lasti` is true, then push the offset that the exception was raised at. + 3. push the exception to the stack. + 4. jump to the target offset and resume execution. + + +Reraising Exceptions and `lasti` +-------------------------------- + +The purpose of pushing `lasti` to the stack is for cases where an exception +needs to be re-raised, and be associated with the original instruction that +raised it. This happens, for example, at the end of a `finally` block, when +any in-flight exception needs to be propagated on. As the frame's instruction +pointer now points into the finally block, a `RERAISE` instruction +(with `oparg > 0`) sets it to the `lasti` value from the stack. + +Format of the exception table +----------------------------- + +Conceptually, the exception table consists of a sequence of 5-tuples: +``` + 1. `start-offset` (inclusive) + 2. `end-offset` (exclusive) + 3. `target` + 4. `stack-depth` + 5. `push-lasti` (boolean) +``` + +All offsets and lengths are in code units, not bytes. + +We want the format to be compact, but quickly searchable. +For it to be compact, it needs to have variable sized entries so that we can store common (small) offsets compactly, but handle large offsets if needed. +For it to be searchable quickly, we need to support binary search giving us log(n) performance in all cases. +Binary search typically assumes fixed size entries, but that is not necessary, as long as we can identify the start of an entry. + +It is worth noting that the size (end-start) is always smaller than the end, so we encode the entries as: + `start, size, target, depth, push-lasti`. + +Also, sizes are limited to 2**30 as the code length cannot exceed 2**31 and each code unit takes 2 bytes. +It also happens that depth is generally quite small. + +So, we need to encode: +``` + `start` (up to 30 bits) + `size` (up to 30 bits) + `target` (up to 30 bits) + `depth` (up to ~8 bits) + `lasti` (1 bit) +``` + +We need a marker for the start of the entry, so the first byte of entry will have the most significant bit set. +Since the most significant bit is reserved for marking the start of an entry, we have 7 bits per byte to encode offsets. +Encoding uses a standard varint encoding, but with only 7 bits instead of the usual 8. +The 8 bits of a byte are (msb left) SXdddddd where S is the start bit. X is the extend bit meaning that the next byte is required to extend the offset. + +In addition, we combine `depth` and `lasti` into a single value, `((depth<<1)+lasti)`, before encoding. + +For example, the exception entry: +``` + `start`: 20 + `end`: 28 + `target`: 100 + `depth`: 3 + `lasti`: False +``` + +is encoded by first converting to the more compact four value form: +``` + `start`: 20 + `size`: 8 + `target`: 100 + `depth<<1+lasti`: 6 +``` + +which is then encoded as: +``` + 148 (MSB + 20 for start) + 8 (size) + 65 (Extend bit + 1) + 36 (Remainder of target, 100 == (1<<6)+36) + 6 +``` + +for a total of five bytes. + + +Script to parse the exception table +----------------------------------- + +``` +def parse_varint(iterator): + b = next(iterator) + val = b & 63 + while b&64: + val <<= 6 + b = next(iterator) + val |= b&63 + return val +``` +``` +def parse_exception_table(code): + iterator = iter(code.co_exceptiontable) + try: + while True: + start = parse_varint(iterator)*2 + length = parse_varint(iterator)*2 + end = start + length - 2 # Present as inclusive, not exclusive + target = parse_varint(iterator)*2 + dl = parse_varint(iterator) + depth = dl >> 1 + lasti = bool(dl&1) + yield start, end, target, depth, lasti + except StopIteration: + return +``` diff --git a/Objects/exception_handling_notes.txt b/Objects/exception_handling_notes.txt deleted file mode 100644 index 387ef935ce739e..00000000000000 --- a/Objects/exception_handling_notes.txt +++ /dev/null @@ -1,182 +0,0 @@ -Description of exception handling in Python 3.11 ------------------------------------------------- - -Python 3.11 uses what is known as "zero-cost" exception handling. -Prior to 3.11, exceptions were handled by a runtime stack of "blocks". - -In zero-cost exception handling, the cost of supporting exceptions is minimized. -In the common case (where no exception is raised) the cost is reduced -to zero (or close to zero). -The cost of raising an exception is increased, but not by much. - -The following code: - -def f(): - try: - g(0) - except: - return "fail" - -compiles as follows in 3.10: - - 2 0 SETUP_FINALLY 7 (to 16) - - 3 2 LOAD_GLOBAL 0 (g) - 4 LOAD_CONST 1 (0) - 6 CALL_NO_KW 1 - 8 POP_TOP - 10 POP_BLOCK - 12 LOAD_CONST 0 (None) - 14 RETURN_VALUE - - 4 >> 16 POP_TOP - 18 POP_TOP - 20 POP_TOP - - 5 22 POP_EXCEPT - 24 LOAD_CONST 3 ('fail') - 26 RETURN_VALUE - -Note the explicit instructions to push and pop from the "block" stack: -SETUP_FINALLY and POP_BLOCK. - -In 3.11, the SETUP_FINALLY and POP_BLOCK are eliminated, replaced with -a table to determine where to jump to when an exception is raised. - - 1 0 RESUME 0 - - 2 2 NOP - - 3 4 LOAD_GLOBAL 1 (g + NULL) - 16 LOAD_CONST 1 (0) - 18 PRECALL 1 - 22 CALL 1 - 32 POP_TOP - 34 LOAD_CONST 0 (None) - 36 RETURN_VALUE - >> 38 PUSH_EXC_INFO - - 4 40 POP_TOP - - 5 42 POP_EXCEPT - 44 LOAD_CONST 2 ('fail') - 46 RETURN_VALUE - >> 48 COPY 3 - 50 POP_EXCEPT - 52 RERAISE 1 -ExceptionTable: - 4 to 32 -> 38 [0] - 38 to 40 -> 48 [1] lasti - -(Note this code is from 3.11, later versions may have slightly different bytecode.) - -If an instruction raises an exception then its offset is used to find the target to jump to. -For example, the CALL at offset 22, falls into the range 4 to 32. -So, if g() raises an exception, then control jumps to offset 38. - - -Unwinding ---------- - -When an exception is raised, the current instruction offset is used to find following: -target to jump to, stack depth, and 'lasti', which determines whether the instruction -offset of the raising instruction should be pushed. - -This information is stored in the exception table, described below. - -If there is no relevant entry, the exception bubbles up to the caller. - -If there is an entry, then: - 1. pop values from the stack until it matches the stack depth for the handler. - 2. if 'lasti' is true, then push the offset that the exception was raised at. - 3. push the exception to the stack. - 4. jump to the target offset and resume execution. - - -Format of the exception table ------------------------------ - -Conceptually, the exception table consists of a sequence of 5-tuples: - 1. start-offset (inclusive) - 2. end-offset (exclusive) - 3. target - 4. stack-depth - 5. push-lasti (boolean) - -All offsets and lengths are in instructions, not bytes. - -We want the format to be compact, but quickly searchable. -For it to be compact, it needs to have variable sized entries so that we can store common (small) offsets compactly, but handle large offsets if needed. -For it to be searchable quickly, we need to support binary search giving us log(n) performance in all cases. -Binary search typically assumes fixed size entries, but that is not necessary, as long as we can identify the start of an entry. - -It is worth noting that the size (end-start) is always smaller than the end, so we encode the entries as: - start, size, target, depth, push-lasti - -Also, sizes are limited to 2**30 as the code length cannot exceed 2**31 and each instruction takes 2 bytes. -It also happens that depth is generally quite small. - -So, we need to encode: - start (up to 30 bits) - size (up to 30 bits) - target (up to 30 bits) - depth (up to ~8 bits) - lasti (1 bit) - -We need a marker for the start of the entry, so the first byte of entry will have the most significant bit set. -Since the most significant bit is reserved for marking the start of an entry, we have 7 bits per byte to encode offsets. -Encoding uses a standard varint encoding, but with only 7 bits instead of the usual 8. -The 8 bits of a bit are (msb left) SXdddddd where S is the start bit. X is the extend bit meaning that the next byte is required to extend the offset. - -In addition, we will combine depth and lasti into a single value, ((depth<<1)+lasti), before encoding. - -For example, the exception entry: - start: 20 - end: 28 - target: 100 - depth: 3 - lasti: False - -is encoded first by converting to the more compact four value form: - start: 20 - size: 8 - target: 100 - depth<<1+lasti: 6 - -which is then encoded as: - 148 (MSB + 20 for start) - 8 (size) - 65 (Extend bit + 1) - 36 (Remainder of target, 100 == (1<<6)+36) - 6 - -for a total of five bytes. - - - -Script to parse the exception table ------------------------------------ - -def parse_varint(iterator): - b = next(iterator) - val = b & 63 - while b&64: - val <<= 6 - b = next(iterator) - val |= b&63 - return val - -def parse_exception_table(code): - iterator = iter(code.co_exceptiontable) - try: - while True: - start = parse_varint(iterator)*2 - length = parse_varint(iterator)*2 - end = start + length - 2 # Present as inclusive, not exclusive - target = parse_varint(iterator)*2 - dl = parse_varint(iterator) - depth = dl >> 1 - lasti = bool(dl&1) - yield start, end, target, depth, lasti - except StopIteration: - return From 61d3ab32da92e70bb97a544d76ef2b837501024f Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 3 Jun 2024 15:06:31 +0300 Subject: [PATCH 054/373] gh-116560: Add PyLong_GetSign() public function (#116561) Co-authored-by: Victor Stinner --- Doc/c-api/long.rst | 13 +++++++++++++ Doc/whatsnew/3.14.rst | 3 +++ Include/cpython/longobject.h | 10 +++++++--- Lib/test/test_capi/test_long.py | 16 ++++++++++++++++ ...024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst | 1 + Modules/_testcapi/long.c | 14 ++++++++++++++ Objects/longobject.c | 12 ++++++++++++ 7 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 522c028cfb8d40..a0e111af5996d7 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -494,6 +494,19 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. versionadded:: 3.13 +.. c:function:: int PyLong_GetSign(PyObject *obj, int *sign) + + Get the sign of the integer object *obj*. + + On success, set *\*sign* to the integer sign (0, -1 or +1 for zero, negative or + positive integer, respectively) and return 0. + + On failure, return -1 with an exception set. This function always succeeds + if *obj* is a :c:type:`PyLongObject` or its subtype. + + .. versionadded:: 3.14 + + .. c:function:: int PyUnstable_Long_IsCompact(const PyLongObject* op) Return 1 if *op* is compact, 0 otherwise. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 47f3e30942397f..b2dd80b64a691a 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -255,6 +255,9 @@ C API Changes New Features ------------ +* Add :c:func:`PyLong_GetSign` function to get the sign of :class:`int` objects. + (Contributed by Sergey B Kirpichev in :gh:`116560`.) + Porting to Python 3.14 ---------------------- diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 96815938c8277a..19a6722d07734a 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -55,9 +55,13 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); -// _PyLong_Sign. Return 0 if v is 0, -1 if v < 0, +1 if v > 0. -// v must not be NULL, and must be a normalized long. -// There are no error cases. +/* PyLong_GetSign. Get the sign of an integer object: + 0, -1 or +1 for zero, negative or positive integer, respectively. + + - On success, set '*sign' to the integer sign, and return 0. + - On failure, set an exception, and return -1. */ +PyAPI_FUNC(int) PyLong_GetSign(PyObject *v, int *sign); + PyAPI_FUNC(int) _PyLong_Sign(PyObject *v); /* _PyLong_NumBits. Return the number of bits needed to represent the diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 83f894e552f983..06a29b5a0505b4 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -721,6 +721,22 @@ def test_long_fromnativebytes(self): self.assertEqual(expect_u, fromnativebytes(v_be, n, 4, 1), f"PyLong_FromNativeBytes(buffer, {n}, )") + def test_long_getsign(self): + # Test PyLong_GetSign() + getsign = _testcapi.pylong_getsign + self.assertEqual(getsign(1), 1) + self.assertEqual(getsign(123456), 1) + self.assertEqual(getsign(-2), -1) + self.assertEqual(getsign(0), 0) + self.assertEqual(getsign(True), 1) + self.assertEqual(getsign(IntSubclass(-11)), -1) + self.assertEqual(getsign(False), 0) + + self.assertRaises(TypeError, getsign, 1.0) + self.assertRaises(TypeError, getsign, Index(123)) + + # CRASHES getsign(NULL) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst b/Misc/NEWS.d/next/C API/2024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst new file mode 100644 index 00000000000000..9bcadfd9247f78 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst @@ -0,0 +1 @@ +Add :c:func:`PyLong_GetSign` function. Patch by Sergey B Kirpichev. diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 769c3909ea3fb1..2b5e85d5707522 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -92,6 +92,19 @@ pylong_fromnativebytes(PyObject *module, PyObject *args) return res; } + +static PyObject * +pylong_getsign(PyObject *module, PyObject *arg) +{ + int sign; + NULLABLE(arg); + if (PyLong_GetSign(arg, &sign) == -1) { + return NULL; + } + return PyLong_FromLong(sign); +} + + static PyObject * pylong_aspid(PyObject *module, PyObject *arg) { @@ -109,6 +122,7 @@ static PyMethodDef test_methods[] = { {"pylong_fromunicodeobject", pylong_fromunicodeobject, METH_VARARGS}, {"pylong_asnativebytes", pylong_asnativebytes, METH_VARARGS}, {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, + {"pylong_getsign", pylong_getsign, METH_O}, {"pylong_aspid", pylong_aspid, METH_O}, {NULL}, }; diff --git a/Objects/longobject.c b/Objects/longobject.c index 2dc2cb7a47b460..054689471e7aa9 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -770,6 +770,18 @@ _PyLong_Sign(PyObject *vv) return _PyLong_NonCompactSign(v); } +int +PyLong_GetSign(PyObject *vv, int *sign) +{ + if (!PyLong_Check(vv)) { + PyErr_Format(PyExc_TypeError, "expect int, got %T", vv); + return -1; + } + + *sign = _PyLong_Sign(vv); + return 0; +} + static int bit_length_digit(digit x) { From 4765e1fa292007f8ddc59f33454b747312506a7a Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Mon, 3 Jun 2024 15:27:44 +0200 Subject: [PATCH 055/373] gh-102511: Amend 3.13.0b1.rst (GH-119895) --- Misc/NEWS.d/3.13.0b1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/NEWS.d/3.13.0b1.rst b/Misc/NEWS.d/3.13.0b1.rst index 525491a2603416..09b62c8377aabd 100644 --- a/Misc/NEWS.d/3.13.0b1.rst +++ b/Misc/NEWS.d/3.13.0b1.rst @@ -300,6 +300,7 @@ Improve :exc:`SyntaxError` message for empty type param brackets. .. nonce: qDEB66 .. section: Core and Builtins +Fix :func:`os.path.normpath` for UNC paths on Windows. Speed up :func:`os.path.splitroot` with a native implementation. .. From fd01271366abefa8f991e53f090387882fbd6bdd Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 3 Jun 2024 15:42:45 +0100 Subject: [PATCH 056/373] gh-119679: Ensures correct import libraries are included in Windows install packages (GH-119790) --- ...-05-30-17-39-25.gh-issue-119679.mZC87w.rst | 1 + PC/layout/main.py | 14 +++++++----- Tools/msi/freethreaded/freethreaded_files.wxs | 22 ++++++++++++------- 3 files changed, 23 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-05-30-17-39-25.gh-issue-119679.mZC87w.rst diff --git a/Misc/NEWS.d/next/Windows/2024-05-30-17-39-25.gh-issue-119679.mZC87w.rst b/Misc/NEWS.d/next/Windows/2024-05-30-17-39-25.gh-issue-119679.mZC87w.rst new file mode 100644 index 00000000000000..db9e798d3ddcb8 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-05-30-17-39-25.gh-issue-119679.mZC87w.rst @@ -0,0 +1 @@ +Ensures correct import libraries are included in Windows installs. diff --git a/PC/layout/main.py b/PC/layout/main.py index 1c4842f8588a5b..716f01097fe3b0 100644 --- a/PC/layout/main.py +++ b/PC/layout/main.py @@ -121,7 +121,7 @@ def get_tcltk_lib(ns): def get_layout(ns): - def in_build(f, dest="", new_name=None): + def in_build(f, dest="", new_name=None, no_lib=False): n, _, x = f.rpartition(".") n = new_name or n src = ns.build / f @@ -136,7 +136,7 @@ def in_build(f, dest="", new_name=None): pdb = src.with_suffix(".pdb") if pdb.is_file(): yield dest + n + ".pdb", pdb - if ns.include_dev: + if ns.include_dev and not no_lib: lib = src.with_suffix(".lib") if lib.is_file(): yield "libs/" + n + ".lib", lib @@ -202,7 +202,9 @@ def in_build(f, dest="", new_name=None): yield "LICENSE.txt", ns.build / "LICENSE.txt" - for dest, src in rglob(ns.build, "*.pyd"): + dest="" if ns.flat_dlls else "DLLs/" + + for _, src in rglob(ns.build, "*.pyd"): if ns.include_freethreaded: if not src.match("*.cp*t-win*.pyd"): continue @@ -217,14 +219,14 @@ def in_build(f, dest="", new_name=None): continue if src in TCLTK_PYDS_ONLY and not ns.include_tcltk: continue - yield from in_build(src.name, dest="" if ns.flat_dlls else "DLLs/") + yield from in_build(src.name, dest=dest, no_lib=True) - for dest, src in rglob(ns.build, "*.dll"): + for _, src in rglob(ns.build, "*.dll"): if src.stem.endswith("_d") != bool(ns.debug) and src not in REQUIRED_DLLS: continue if src in EXCLUDE_FROM_DLLS: continue - yield from in_build(src.name, dest="" if ns.flat_dlls else "DLLs/") + yield from in_build(src.name, no_lib=True) if ns.zip_lib: zip_name = PYTHON_ZIP_NAME diff --git a/Tools/msi/freethreaded/freethreaded_files.wxs b/Tools/msi/freethreaded/freethreaded_files.wxs index adaf63c69d5ade..49ecb3429ad8f3 100644 --- a/Tools/msi/freethreaded/freethreaded_files.wxs +++ b/Tools/msi/freethreaded/freethreaded_files.wxs @@ -48,6 +48,12 @@ + + + + + + @@ -69,8 +75,14 @@ - - + + + + + + + + @@ -147,12 +159,6 @@ - - - - - - From 42a34ddb0b63e638905b01e17a7254623a0de427 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 3 Jun 2024 11:13:07 -0400 Subject: [PATCH 057/373] gh-119588: Implement zipfile.Path.is_symlink (zipp 3.19.0). (#119591) --- Doc/library/zipfile.rst | 9 +++++++ Lib/test/test_zipfile/_path/test_path.py | 27 ++++++++++++------- Lib/zipfile/_path/__init__.py | 7 +++-- ...-05-26-21-28-11.gh-issue-119588.wlLBK5.rst | 1 + 4 files changed, 32 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-26-21-28-11.gh-issue-119588.wlLBK5.rst diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index aad2028523dc34..a4d9a1852f8f0d 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -585,6 +585,15 @@ Path objects are traversable using the ``/`` operator or ``joinpath``. Return ``True`` if the current context references a file. +.. method:: Path.is_symlink() + + Return ``True`` if the current context references a symbolic link. + + .. versionadded:: 3.12 + + .. versionchanged:: 3.12.4 + Prior to 3.12.4, ``is_symlink`` would unconditionally return ``False``. + .. method:: Path.exists() Return ``True`` if the current context references a file or diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py index e5d2acf39a10f8..99842ffd63a64e 100644 --- a/Lib/test/test_zipfile/_path/test_path.py +++ b/Lib/test/test_zipfile/_path/test_path.py @@ -3,6 +3,7 @@ import contextlib import pathlib import pickle +import stat import sys import unittest import zipfile @@ -21,12 +22,17 @@ class itertools: Counter = Counter +def _make_link(info: zipfile.ZipInfo): # type: ignore[name-defined] + info.external_attr |= stat.S_IFLNK << 16 + + def build_alpharep_fixture(): """ Create a zip file with this structure: . ├── a.txt + ├── n.txt (-> a.txt) ├── b │ ├── c.txt │ ├── d @@ -47,6 +53,7 @@ def build_alpharep_fixture(): - multiple files in a directory (b/c, b/f) - a directory containing only a directory (g/h) - a directory with files of different extensions (j/klm) + - a symlink (n) pointing to (a) "alpha" because it uses alphabet "rep" because it's a representative example @@ -61,6 +68,9 @@ def build_alpharep_fixture(): zf.writestr("j/k.bin", b"content of k") zf.writestr("j/l.baz", b"content of l") zf.writestr("j/m.bar", b"content of m") + zf.writestr("n.txt", b"a.txt") + _make_link(zf.infolist()[-1]) + zf.filename = "alpharep.zip" return zf @@ -91,7 +101,7 @@ def zipfile_ondisk(self, alpharep): def test_iterdir_and_types(self, alpharep): root = zipfile.Path(alpharep) assert root.is_dir() - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() assert a.is_file() assert b.is_dir() assert g.is_dir() @@ -111,7 +121,7 @@ def test_is_file_missing(self, alpharep): @pass_alpharep def test_iterdir_on_file(self, alpharep): root = zipfile.Path(alpharep) - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() with self.assertRaises(ValueError): a.iterdir() @@ -126,7 +136,7 @@ def test_subdir_is_dir(self, alpharep): @pass_alpharep def test_open(self, alpharep): root = zipfile.Path(alpharep) - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() with a.open(encoding="utf-8") as strm: data = strm.read() self.assertEqual(data, "content of a") @@ -230,7 +240,7 @@ def test_open_missing_directory(self, alpharep): @pass_alpharep def test_read(self, alpharep): root = zipfile.Path(alpharep) - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() assert a.read_text(encoding="utf-8") == "content of a" # Also check positional encoding arg (gh-101144). assert a.read_text("utf-8") == "content of a" @@ -296,7 +306,7 @@ def test_mutability(self, alpharep): reflect that change. """ root = zipfile.Path(alpharep) - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() alpharep.writestr('foo.txt', 'foo') alpharep.writestr('bar/baz.txt', 'baz') assert any(child.name == 'foo.txt' for child in root.iterdir()) @@ -513,12 +523,9 @@ def test_eq_hash(self, alpharep): @pass_alpharep def test_is_symlink(self, alpharep): - """ - See python/cpython#82102 for symlink support beyond this object. - """ - root = zipfile.Path(alpharep) - assert not root.is_symlink() + assert not root.joinpath('a.txt').is_symlink() + assert root.joinpath('n.txt').is_symlink() @pass_alpharep def test_relative_to(self, alpharep): diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py index 79ebb777354e03..f5ea18cee61930 100644 --- a/Lib/zipfile/_path/__init__.py +++ b/Lib/zipfile/_path/__init__.py @@ -5,6 +5,7 @@ import contextlib import pathlib import re +import stat import sys from .glob import Translator @@ -390,9 +391,11 @@ def match(self, path_pattern): def is_symlink(self): """ - Return whether this path is a symlink. Always false (python/cpython#82102). + Return whether this path is a symlink. """ - return False + info = self.root.getinfo(self.at) + mode = info.external_attr >> 16 + return stat.S_ISLNK(mode) def glob(self, pattern): if not pattern: diff --git a/Misc/NEWS.d/next/Library/2024-05-26-21-28-11.gh-issue-119588.wlLBK5.rst b/Misc/NEWS.d/next/Library/2024-05-26-21-28-11.gh-issue-119588.wlLBK5.rst new file mode 100644 index 00000000000000..01321d8bfe2ad5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-26-21-28-11.gh-issue-119588.wlLBK5.rst @@ -0,0 +1 @@ +``zipfile.Path.is_symlink`` now assesses if the given path is a symlink. From 6acb32fac3511c1d5500cac66f1d6397dcdab835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Mon, 3 Jun 2024 11:32:40 -0400 Subject: [PATCH 058/373] Use Cirrus M1 macOS runners for CI (GH-119979) Co-authored-by: Ee Durbin --- .github/workflows/build.yml | 8 ++++---- Lib/test/test_pyrepl/test_unix_console.py | 3 ++- Lib/test/test_pyrepl/test_windows_console.py | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7e63737b90b72a..cde93c77a0b82e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -199,8 +199,8 @@ jobs: uses: ./.github/workflows/reusable-macos.yml with: config_hash: ${{ needs.check_source.outputs.config_hash }} - # macos-14 is M1, macos-13 is Intel - os-matrix: '["macos-14", "macos-13"]' + # Cirrus is M1, macos-13 is default GHA Intel + os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma", "macos-13"]' build_macos_free_threading: name: 'macOS (free-threading)' @@ -210,8 +210,8 @@ jobs: with: config_hash: ${{ needs.check_source.outputs.config_hash }} free-threading: true - # macos-14-large is Intel with 12 cores (most parallelism) - os-matrix: '["macos-14"]' + # Cirrus is M1 + os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma"]' build_ubuntu: name: 'Ubuntu' diff --git a/Lib/test/test_pyrepl/test_unix_console.py b/Lib/test/test_pyrepl/test_unix_console.py index d0b98f17ade094..e3bbabcb0089fb 100644 --- a/Lib/test/test_pyrepl/test_unix_console.py +++ b/Lib/test/test_pyrepl/test_unix_console.py @@ -6,12 +6,14 @@ from unittest.mock import MagicMock, call, patch, ANY from .support import handle_all_events, code_to_events + try: from _pyrepl.console import Event from _pyrepl.unix_console import UnixConsole except ImportError: pass + def unix_console(events, **kwargs): console = UnixConsole() console.get_event = MagicMock(side_effect=events) @@ -138,7 +140,6 @@ def test_wrap(self, _os_write): _os_write.assert_any_call(ANY, b"4") con.restore() - def test_cursor_left(self, _os_write): code = "1" events = itertools.chain( diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index e52a54d31fb5d8..4a3b2baf64a944 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -1,7 +1,7 @@ import sys import unittest -if sys.platform != 'win32': +if sys.platform != "win32": raise unittest.SkipTest("test only relevant on win32") From 153b118b78588209850cc2a4cbc977f193a3ab6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 3 Jun 2024 17:48:02 +0200 Subject: [PATCH 059/373] gh-119981: Use do while(0) in some symtable.c multi-line macros (#119982) --- Python/symtable.c | 97 +++++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 45 deletions(-) diff --git a/Python/symtable.c b/Python/symtable.c index d8240cdd11f7ea..0ee8ca36cf8df0 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -601,16 +601,17 @@ error_at_directive(PySTEntryObject *ste, PyObject *name) global: set of all symbol names explicitly declared as global */ -#define SET_SCOPE(DICT, NAME, I) { \ - PyObject *o = PyLong_FromLong(I); \ - if (!o) \ - return 0; \ - if (PyDict_SetItem((DICT), (NAME), o) < 0) { \ +#define SET_SCOPE(DICT, NAME, I) \ + do { \ + PyObject *o = PyLong_FromLong(I); \ + if (!o) \ + return 0; \ + if (PyDict_SetItem((DICT), (NAME), o) < 0) { \ + Py_DECREF(o); \ + return 0; \ + } \ Py_DECREF(o); \ - return 0; \ - } \ - Py_DECREF(o); \ -} + } while(0) /* Decide on scope of name, given flags. @@ -1562,39 +1563,45 @@ symtable_enter_type_param_block(struct symtable *st, identifier name, return --(ST)->recursion_depth,(X) #define VISIT(ST, TYPE, V) \ - if (!symtable_visit_ ## TYPE((ST), (V))) \ - VISIT_QUIT((ST), 0); - -#define VISIT_SEQ(ST, TYPE, SEQ) { \ - int i; \ - asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = 0; i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ - if (!symtable_visit_ ## TYPE((ST), elt)) \ - VISIT_QUIT((ST), 0); \ - } \ -} - -#define VISIT_SEQ_TAIL(ST, TYPE, SEQ, START) { \ - int i; \ - asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = (START); i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ - if (!symtable_visit_ ## TYPE((ST), elt)) \ - VISIT_QUIT((ST), 0); \ - } \ -} - -#define VISIT_SEQ_WITH_NULL(ST, TYPE, SEQ) { \ - int i = 0; \ - asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = 0; i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ - if (!elt) continue; /* can be NULL */ \ - if (!symtable_visit_ ## TYPE((ST), elt)) \ - VISIT_QUIT((ST), 0); \ - } \ -} + do { \ + if (!symtable_visit_ ## TYPE((ST), (V))) { \ + VISIT_QUIT((ST), 0); \ + } \ + } while(0) + +#define VISIT_SEQ(ST, TYPE, SEQ) \ + do { \ + int i; \ + asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ + for (i = 0; i < asdl_seq_LEN(seq); i++) { \ + TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ + if (!symtable_visit_ ## TYPE((ST), elt)) \ + VISIT_QUIT((ST), 0); \ + } \ + } while(0) + +#define VISIT_SEQ_TAIL(ST, TYPE, SEQ, START) \ + do { \ + int i; \ + asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ + for (i = (START); i < asdl_seq_LEN(seq); i++) { \ + TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ + if (!symtable_visit_ ## TYPE((ST), elt)) \ + VISIT_QUIT((ST), 0); \ + } \ + } while(0) + +#define VISIT_SEQ_WITH_NULL(ST, TYPE, SEQ) \ + do { \ + int i = 0; \ + asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ + for (i = 0; i < asdl_seq_LEN(seq); i++) { \ + TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ + if (!elt) continue; /* can be NULL */ \ + if (!symtable_visit_ ## TYPE((ST), elt)) \ + VISIT_QUIT((ST), 0); \ + } \ + } while(0) static int symtable_record_directive(struct symtable *st, identifier name, int lineno, @@ -2261,11 +2268,11 @@ symtable_visit_expr(struct symtable *st, expr_ty e) break; case Slice_kind: if (e->v.Slice.lower) - VISIT(st, expr, e->v.Slice.lower) + VISIT(st, expr, e->v.Slice.lower); if (e->v.Slice.upper) - VISIT(st, expr, e->v.Slice.upper) + VISIT(st, expr, e->v.Slice.upper); if (e->v.Slice.step) - VISIT(st, expr, e->v.Slice.step) + VISIT(st, expr, e->v.Slice.step); break; case Name_kind: if (!symtable_add_def(st, e->v.Name.id, From 1d4c2e4a877a48cdc8bcc9808d799b91c82b3757 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 3 Jun 2024 19:03:56 +0300 Subject: [PATCH 060/373] gh-119057: Use better error messages for zero division (#119066) --- Doc/howto/logging-cookbook.rst | 2 +- Lib/_pylong.py | 2 +- Lib/test/mathdata/ieee754.txt | 2 +- Lib/test/test_builtin.py | 10 ++++++++++ Lib/test/test_doctest/test_doctest.py | 8 ++++---- Lib/test/test_generators.py | 2 +- Lib/test/test_genexps.py | 2 +- .../2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst | 4 ++++ Objects/complexobject.c | 4 ++-- Objects/floatobject.c | 11 +++++------ Objects/longobject.c | 5 ++--- 11 files changed, 32 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index 60d88204b795f6..3ed2dd6251afe9 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -2950,7 +2950,7 @@ When run, this produces a file with exactly two lines: .. code-block:: none 28/01/2015 07:21:23|INFO|Sample message| - 28/01/2015 07:21:23|ERROR|ZeroDivisionError: integer division or modulo by zero|'Traceback (most recent call last):\n File "logtest7.py", line 30, in main\n x = 1 / 0\nZeroDivisionError: integer division or modulo by zero'| + 28/01/2015 07:21:23|ERROR|ZeroDivisionError: division by zero|'Traceback (most recent call last):\n File "logtest7.py", line 30, in main\n x = 1 / 0\nZeroDivisionError: division by zero'| While the above treatment is simplistic, it points the way to how exception information can be formatted to your liking. The :mod:`traceback` module may be diff --git a/Lib/_pylong.py b/Lib/_pylong.py index f7aabde1434725..a8bf5cd3e638a4 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -530,7 +530,7 @@ def int_divmod(a, b): Its time complexity is O(n**1.58), where n = #bits(a) + #bits(b). """ if b == 0: - raise ZeroDivisionError + raise ZeroDivisionError('division by zero') elif b < 0: q, r = int_divmod(-a, -b) return q, -r diff --git a/Lib/test/mathdata/ieee754.txt b/Lib/test/mathdata/ieee754.txt index a8b8a0a2148f00..0bc45603b8b18a 100644 --- a/Lib/test/mathdata/ieee754.txt +++ b/Lib/test/mathdata/ieee754.txt @@ -116,7 +116,7 @@ inf >>> 0 ** -1 Traceback (most recent call last): ... -ZeroDivisionError: 0.0 cannot be raised to a negative power +ZeroDivisionError: zero to a negative power >>> pow(0, NAN) nan diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d7ba58847a2992..9ff0f488dc4fa9 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -662,6 +662,16 @@ def test_divmod(self): self.assertAlmostEqual(result[1], exp_result[1]) self.assertRaises(TypeError, divmod) + self.assertRaisesRegex( + ZeroDivisionError, + "division by zero", + divmod, 1, 0, + ) + self.assertRaisesRegex( + ZeroDivisionError, + "division by zero", + divmod, 0.0, 0, + ) def test_eval(self): self.assertEqual(eval('1+1'), 2) diff --git a/Lib/test/test_doctest/test_doctest.py b/Lib/test/test_doctest/test_doctest.py index 286c3ecfbc9239..b25d57ceeae6aa 100644 --- a/Lib/test/test_doctest/test_doctest.py +++ b/Lib/test/test_doctest/test_doctest.py @@ -1035,7 +1035,7 @@ def exceptions(): r""" ... >>> x = 12 ... >>> print(x//0) ... Traceback (most recent call last): - ... ZeroDivisionError: integer division or modulo by zero + ... ZeroDivisionError: division by zero ... ''' >>> test = doctest.DocTestFinder().find(f)[0] >>> doctest.DocTestRunner(verbose=False).run(test) @@ -1052,7 +1052,7 @@ def exceptions(): r""" ... >>> print('pre-exception output', x//0) ... pre-exception output ... Traceback (most recent call last): - ... ZeroDivisionError: integer division or modulo by zero + ... ZeroDivisionError: division by zero ... ''' >>> test = doctest.DocTestFinder().find(f)[0] >>> doctest.DocTestRunner(verbose=False).run(test) @@ -1063,7 +1063,7 @@ def exceptions(): r""" print('pre-exception output', x//0) Exception raised: ... - ZeroDivisionError: integer division or modulo by zero + ZeroDivisionError: division by zero TestResults(failed=1, attempted=2) Exception messages may contain newlines: @@ -1258,7 +1258,7 @@ def exceptions(): r""" Exception raised: Traceback (most recent call last): ... - ZeroDivisionError: integer division or modulo by zero + ZeroDivisionError: division by zero TestResults(failed=1, attempted=1) >>> _colorize.COLORIZE = save_colorize diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index 6d36df2c7413e0..4598e62122b09c 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -907,7 +907,7 @@ def b(): File "", line 1, in ? File "", line 2, in g File "", line 2, in f - ZeroDivisionError: integer division or modulo by zero + ZeroDivisionError: division by zero >>> next(k) # and the generator cannot be resumed Traceback (most recent call last): File "", line 1, in ? diff --git a/Lib/test/test_genexps.py b/Lib/test/test_genexps.py index 4f2d3cdcc7943e..7fb58a67368576 100644 --- a/Lib/test/test_genexps.py +++ b/Lib/test/test_genexps.py @@ -223,7 +223,7 @@ next(g) File "", line 1, in g = (10 // i for i in (5, 0, 2)) - ZeroDivisionError: integer division or modulo by zero + ZeroDivisionError: division by zero >>> next(g) Traceback (most recent call last): File "", line 1, in -toplevel- diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst new file mode 100644 index 00000000000000..d252888906c348 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst @@ -0,0 +1,4 @@ +Improve :exc:`ZeroDivisionError` error message. +Now, all error messages are harmonized: all ``/``, ``//``, and ``%`` +operations just use "division by zero" message. +And ``0 ** -1`` operation uses "zero to a negative power". diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 59c84f1359b966..a8be266970afd0 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -523,7 +523,7 @@ complex_div(PyObject *v, PyObject *w) errno = 0; quot = _Py_c_quot(a, b); if (errno == EDOM) { - PyErr_SetString(PyExc_ZeroDivisionError, "complex division by zero"); + PyErr_SetString(PyExc_ZeroDivisionError, "division by zero"); return NULL; } return PyComplex_FromCComplex(quot); @@ -554,7 +554,7 @@ complex_pow(PyObject *v, PyObject *w, PyObject *z) _Py_ADJUST_ERANGE2(p.real, p.imag); if (errno == EDOM) { PyErr_SetString(PyExc_ZeroDivisionError, - "0.0 to a negative or complex power"); + "zero to a negative or complex power"); return NULL; } else if (errno == ERANGE) { diff --git a/Objects/floatobject.c b/Objects/floatobject.c index a5bf356cc9c7f0..2627ba80eed8ca 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -623,7 +623,7 @@ float_div(PyObject *v, PyObject *w) CONVERT_TO_DOUBLE(w, b); if (b == 0.0) { PyErr_SetString(PyExc_ZeroDivisionError, - "float division by zero"); + "division by zero"); return NULL; } a = a / b; @@ -639,7 +639,7 @@ float_rem(PyObject *v, PyObject *w) CONVERT_TO_DOUBLE(w, wx); if (wx == 0.0) { PyErr_SetString(PyExc_ZeroDivisionError, - "float modulo by zero"); + "division by zero"); return NULL; } mod = fmod(vx, wx); @@ -704,7 +704,7 @@ float_divmod(PyObject *v, PyObject *w) CONVERT_TO_DOUBLE(v, vx); CONVERT_TO_DOUBLE(w, wx); if (wx == 0.0) { - PyErr_SetString(PyExc_ZeroDivisionError, "float divmod()"); + PyErr_SetString(PyExc_ZeroDivisionError, "division by zero"); return NULL; } _float_div_mod(vx, wx, &floordiv, &mod); @@ -719,7 +719,7 @@ float_floor_div(PyObject *v, PyObject *w) CONVERT_TO_DOUBLE(v, vx); CONVERT_TO_DOUBLE(w, wx); if (wx == 0.0) { - PyErr_SetString(PyExc_ZeroDivisionError, "float floor division by zero"); + PyErr_SetString(PyExc_ZeroDivisionError, "division by zero"); return NULL; } _float_div_mod(vx, wx, &floordiv, &mod); @@ -788,8 +788,7 @@ float_pow(PyObject *v, PyObject *w, PyObject *z) int iw_is_odd = DOUBLE_IS_ODD_INTEGER(iw); if (iw < 0.0) { PyErr_SetString(PyExc_ZeroDivisionError, - "0.0 cannot be raised to a " - "negative power"); + "zero to a negative power"); return NULL; } /* use correct sign if iw is odd */ diff --git a/Objects/longobject.c b/Objects/longobject.c index 054689471e7aa9..ee0b2a038a2aab 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3121,8 +3121,7 @@ long_divrem(PyLongObject *a, PyLongObject *b, PyLongObject *z; if (size_b == 0) { - PyErr_SetString(PyExc_ZeroDivisionError, - "integer division or modulo by zero"); + PyErr_SetString(PyExc_ZeroDivisionError, "division by zero"); return -1; } if (size_a < size_b || @@ -3185,7 +3184,7 @@ long_rem(PyLongObject *a, PyLongObject *b, PyLongObject **prem) if (size_b == 0) { PyErr_SetString(PyExc_ZeroDivisionError, - "integer modulo by zero"); + "division by zero"); return -1; } if (size_a < size_b || From 4e8aa32245e2d72bf558b711ccdbcee594347615 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 3 Jun 2024 18:34:36 +0200 Subject: [PATCH 061/373] gh-119727: Add --single-process option to regrtest (#119728) --- Lib/test/libregrtest/cmdline.py | 11 ++++++++ Lib/test/libregrtest/main.py | 26 ++++++++++++------- Lib/test/libregrtest/worker.py | 6 ++--- Lib/test/test_regrtest.py | 13 ++++++++++ ...-05-29-15-28-08.gh-issue-119727.dVkaZM.rst | 2 ++ 5 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-05-29-15-28-08.gh-issue-119727.dVkaZM.rst diff --git a/Lib/test/libregrtest/cmdline.py b/Lib/test/libregrtest/cmdline.py index d4dac77b250ad6..2ff4715e82a41b 100644 --- a/Lib/test/libregrtest/cmdline.py +++ b/Lib/test/libregrtest/cmdline.py @@ -174,6 +174,7 @@ def __init__(self, **kwargs) -> None: self.tempdir = None self._add_python_opts = True self.xmlpath = None + self.single_process = False super().__init__(**kwargs) @@ -307,6 +308,12 @@ def _create_parser(): group.add_argument('-j', '--multiprocess', metavar='PROCESSES', dest='use_mp', type=int, help='run PROCESSES processes at once') + group.add_argument('--single-process', action='store_true', + dest='single_process', + help='always run all tests sequentially in ' + 'a single process, ignore -jN option, ' + 'and failed tests are also rerun sequentially ' + 'in the same process') group.add_argument('-T', '--coverage', action='store_true', dest='trace', help='turn on code coverage tracing using the trace ' @@ -435,6 +442,10 @@ def _parse_args(args, **kwargs): else: ns._add_python_opts = False + # --singleprocess overrides -jN option + if ns.single_process: + ns.use_mp = None + # When both --slow-ci and --fast-ci options are present, # --slow-ci has the priority if ns.slow_ci: diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index 9e7a7d60880091..5148d3070513e8 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -89,12 +89,13 @@ def __init__(self, ns: Namespace, _add_python_opts: bool = False): self.cmdline_args: TestList = ns.args # Workers - if ns.use_mp is None: - num_workers = 0 # run sequentially + self.single_process: bool = ns.single_process + if self.single_process or ns.use_mp is None: + num_workers = 0 # run sequentially in a single process elif ns.use_mp <= 0: - num_workers = -1 # use the number of CPUs + num_workers = -1 # run in parallel, use the number of CPUs else: - num_workers = ns.use_mp + num_workers = ns.use_mp # run in parallel self.num_workers: int = num_workers self.worker_json: StrJSON | None = ns.worker_json @@ -236,7 +237,7 @@ def list_tests(tests: TestTuple): def _rerun_failed_tests(self, runtests: RunTests): # Configure the runner to re-run tests - if self.num_workers == 0: + if self.num_workers == 0 and not self.single_process: # Always run tests in fresh processes to have more deterministic # initial state. Don't re-run tests in parallel but limit to a # single worker process to have side effects (on the system load @@ -246,7 +247,6 @@ def _rerun_failed_tests(self, runtests: RunTests): tests, match_tests_dict = self.results.prepare_rerun() # Re-run failed tests - self.log(f"Re-running {len(tests)} failed tests in verbose mode in subprocesses") runtests = runtests.copy( tests=tests, rerun=True, @@ -256,7 +256,15 @@ def _rerun_failed_tests(self, runtests: RunTests): match_tests_dict=match_tests_dict, output_on_failure=False) self.logger.set_tests(runtests) - self._run_tests_mp(runtests, self.num_workers) + + msg = f"Re-running {len(tests)} failed tests in verbose mode" + if not self.single_process: + msg = f"{msg} in subprocesses" + self.log(msg) + self._run_tests_mp(runtests, self.num_workers) + else: + self.log(msg) + self.run_tests_sequentially(runtests) return runtests def rerun_failed_tests(self, runtests: RunTests): @@ -371,7 +379,7 @@ def run_tests_sequentially(self, runtests) -> None: tests = count(jobs, 'test') else: tests = 'tests' - msg = f"Run {tests} sequentially" + msg = f"Run {tests} sequentially in a single process" if runtests.timeout: msg += " (timeout: %s)" % format_duration(runtests.timeout) self.log(msg) @@ -599,7 +607,7 @@ def _add_cross_compile_opts(self, regrtest_opts): keep_environ = True if cross_compile and hostrunner: - if self.num_workers == 0: + if self.num_workers == 0 and not self.single_process: # For now use only two cores for cross-compiled builds; # hostrunner can be expensive. regrtest_opts.extend(['-j', '2']) diff --git a/Lib/test/libregrtest/worker.py b/Lib/test/libregrtest/worker.py index 15d32b5baa04d0..86cc30835fdbda 100644 --- a/Lib/test/libregrtest/worker.py +++ b/Lib/test/libregrtest/worker.py @@ -14,9 +14,9 @@ USE_PROCESS_GROUP = (hasattr(os, "setsid") and hasattr(os, "killpg")) -NEED_TTY = set(''' - test_ioctl -'''.split()) +NEED_TTY = { + 'test_ioctl', +} def create_worker_process(runtests: WorkerRunTests, output_fd: int, diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 17eff617a56aa4..97ce797f0f6acb 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -473,6 +473,19 @@ def test_verbose3_huntrleaks(self): self.assertEqual(regrtest.hunt_refleak.runs, 10) self.assertFalse(regrtest.output_on_failure) + def test_single_process(self): + args = ['-j2', '--single-process'] + with support.captured_stderr(): + regrtest = self.create_regrtest(args) + self.assertEqual(regrtest.num_workers, 0) + self.assertTrue(regrtest.single_process) + + args = ['--fast-ci', '--single-process'] + with support.captured_stderr(): + regrtest = self.create_regrtest(args) + self.assertEqual(regrtest.num_workers, 0) + self.assertTrue(regrtest.single_process) + @dataclasses.dataclass(slots=True) class Rerun: diff --git a/Misc/NEWS.d/next/Tests/2024-05-29-15-28-08.gh-issue-119727.dVkaZM.rst b/Misc/NEWS.d/next/Tests/2024-05-29-15-28-08.gh-issue-119727.dVkaZM.rst new file mode 100644 index 00000000000000..bf28d8bb77b8a2 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-05-29-15-28-08.gh-issue-119727.dVkaZM.rst @@ -0,0 +1,2 @@ +Add ``--single-process`` command line option to Python test runner (regrtest). +Patch by Victor Stinner. From 2e0aa731aebb8ef3d89ada82f5d39b1bbac65d1f Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Mon, 3 Jun 2024 18:07:06 +0100 Subject: [PATCH 062/373] gh-118835: pyrepl: Fix prompt length computation for custom prompts containing ANSI escape codes (#119942) --- Lib/_pyrepl/reader.py | 10 ++++-- Lib/test/test_pyrepl/test_reader.py | 32 +++++++++++++++++++ ...-06-02-15-09-17.gh-issue-118835.KUAuz6.rst | 1 + 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-02-15-09-17.gh-issue-118835.KUAuz6.rst diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index 5401ae7b0ae32d..f2e68ef6f3ee66 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -28,7 +28,7 @@ from . import commands, console, input -from .utils import ANSI_ESCAPE_SEQUENCE, wlen +from .utils import ANSI_ESCAPE_SEQUENCE, wlen, str_width from .trace import trace @@ -339,7 +339,8 @@ def calc_complete_screen(self) -> list[str]: screeninfo.append((0, [])) return screen - def process_prompt(self, prompt: str) -> tuple[str, int]: + @staticmethod + def process_prompt(prompt: str) -> tuple[str, int]: """Process the prompt. This means calculate the length of the prompt. The character \x01 @@ -351,6 +352,11 @@ def process_prompt(self, prompt: str) -> tuple[str, int]: # sequences if they were not explicitly within \x01...\x02. # They are CSI (or ANSI) sequences ( ESC [ ... LETTER ) + # wlen from utils already excludes ANSI_ESCAPE_SEQUENCE chars, + # which breaks the logic below so we redefine it here. + def wlen(s: str) -> int: + return sum(str_width(i) for i in s) + out_prompt = "" l = wlen(prompt) pos = 0 diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index c9b03d5e711539..9fb956b655594f 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -4,6 +4,7 @@ from .support import handle_all_events, handle_events_narrow_console, code_to_events, prepare_reader from _pyrepl.console import Event +from _pyrepl.reader import Reader class TestReader(TestCase): @@ -176,3 +177,34 @@ def test_newline_within_block_trailing_whitespace(self): ) self.assert_screen_equals(reader, expected) self.assertTrue(reader.finished) + + def test_prompt_length(self): + # Handles simple ASCII prompt + ps1 = ">>> " + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, ps1) + self.assertEqual(l, 4) + + # Handles ANSI escape sequences + ps1 = "\033[0;32m>>> \033[0m" + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, "\033[0;32m>>> \033[0m") + self.assertEqual(l, 4) + + # Handles ANSI escape sequences bracketed in \001 .. \002 + ps1 = "\001\033[0;32m\002>>> \001\033[0m\002" + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, "\033[0;32m>>> \033[0m") + self.assertEqual(l, 4) + + # Handles wide characters in prompt + ps1 = "樂>> " + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, ps1) + self.assertEqual(l, 5) + + # Handles wide characters AND ANSI sequences together + ps1 = "\001\033[0;32m\002樂>\001\033[0m\002> " + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, "\033[0;32m樂>\033[0m> ") + self.assertEqual(l, 5) diff --git a/Misc/NEWS.d/next/Library/2024-06-02-15-09-17.gh-issue-118835.KUAuz6.rst b/Misc/NEWS.d/next/Library/2024-06-02-15-09-17.gh-issue-118835.KUAuz6.rst new file mode 100644 index 00000000000000..ec9ca20a487d76 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-02-15-09-17.gh-issue-118835.KUAuz6.rst @@ -0,0 +1 @@ +Fix _pyrepl crash when using custom prompt with ANSI escape codes. From 41c1cefbae71d687d1a935233b086473df65e15c Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 3 Jun 2024 13:42:27 -0400 Subject: [PATCH 063/373] gh-117657: Avoid `sem_clockwait` in TSAN (#119915) The `sem_clockwait` function is not currently instrumented, which leads to false positives. --- Python/parking_lot.c | 2 +- Tools/tsan/suppressions_free_threading.txt | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/Python/parking_lot.c b/Python/parking_lot.c index e2def9e249cd56..841b1d71ea16cb 100644 --- a/Python/parking_lot.c +++ b/Python/parking_lot.c @@ -119,7 +119,7 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout) if (timeout >= 0) { struct timespec ts; -#if defined(CLOCK_MONOTONIC) && defined(HAVE_SEM_CLOCKWAIT) +#if defined(CLOCK_MONOTONIC) && defined(HAVE_SEM_CLOCKWAIT) && !defined(_Py_THREAD_SANITIZER) PyTime_t now; // silently ignore error: cannot report error to the caller (void)PyTime_MonotonicRaw(&now); diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index ff9c8036e92fe7..39cc5b080c8703 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -15,14 +15,10 @@ race:set_allocator_unlocked # These entries are for warnings that trigger in a library function, as called # by a CPython function. -# https://gist.github.com/swtaarrs/9d41251e603fa6dedd604191a6da820d -race:park_detached_threads # https://gist.github.com/swtaarrs/8e0e365e1d9cecece3269a2fb2f2b8b8 race:sock_recv_impl # https://gist.github.com/swtaarrs/08dfe7883b4c975c31ecb39388987a67 race:free_threadstate -# https://gist.github.com/swtaarrs/cd6aec2006e0c1b561b68d65e9f1a872 -race:_PyParkingLot_Park # These warnings trigger directly in a CPython function. @@ -33,8 +29,6 @@ race_top:_mi_heap_delayed_free_partial race_top:_PyEval_EvalFrameDefault race_top:_PyImport_AcquireLock race_top:_PyImport_ReleaseLock -# https://gist.github.com/mpage/0a24eb2dd458441ededb498e9b0e5de8 -race_top:_PyParkingLot_Park race_top:_PyType_HasFeature race_top:assign_version_tag race_top:insertdict @@ -47,8 +41,6 @@ race_top:set_contains_key # https://gist.github.com/colesbury/d13d033f413b4ad07929d044bed86c35 race_top:set_discard_entry race_top:set_inheritable -race_top:start_the_world -race_top:tstate_set_detached race_top:Py_SET_TYPE race_top:_PyDict_CheckConsistency race_top:_PyImport_AcquireLock @@ -64,7 +56,6 @@ race_top:list_get_item_ref race_top:make_pending_calls race_top:set_add_entry race_top:should_intern_string -race_top:llist_insert_tail race_top:_Py_slot_tp_getattr_hook race_top:add_threadstate race_top:dump_traceback From b8fde5db86334690da23343f5f4326adcd8160fb Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 3 Jun 2024 21:44:36 +0100 Subject: [PATCH 064/373] update CODEOWNERS (#120003) --- .github/CODEOWNERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e08d6cc5719737..ca5c8aaa3a0bef 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -34,6 +34,7 @@ Python/ceval*.h @markshannon Python/compile.c @markshannon @iritkatriel Python/assemble.c @markshannon @iritkatriel Python/flowgraph.c @markshannon @iritkatriel +Python/instruction_sequence.c @iritkatriel Python/ast_opt.c @isidentical Python/bytecodes.c @markshannon Python/optimizer*.c @markshannon @@ -74,11 +75,8 @@ Programs/python.c @ericsnowcurrently Tools/build/generate_global_objects.py @ericsnowcurrently # Exceptions -Lib/traceback.py @iritkatriel Lib/test/test_except*.py @iritkatriel -Lib/test/test_traceback.py @iritkatriel Objects/exceptions.c @iritkatriel -Python/traceback.c @iritkatriel # Hashing **/*hashlib* @gpshead @tiran From 47fb4327b5c405da6df066dcaa01b7c1aefab313 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 3 Jun 2024 16:58:41 -0400 Subject: [PATCH 065/373] gh-117657: Fix race involving immortalizing objects (#119927) The free-threaded build currently immortalizes objects that use deferred reference counting (see gh-117783). This typically happens once the first non-main thread is created, but the behavior can be suppressed for tests, in subinterpreters, or during a compile() call. This fixes a race condition involving the tracking of whether the behavior is suppressed. --- Include/internal/pycore_gc.h | 14 +++++-------- Lib/test/support/__init__.py | 4 ++-- Modules/_testinternalcapi.c | 24 ++++++++-------------- Objects/codeobject.c | 4 ++-- Objects/object.c | 2 +- Python/bltinmodule.c | 6 +++--- Python/gc_free_threading.c | 14 ++++++------- Python/pystate.c | 4 +--- Tools/tsan/suppressions_free_threading.txt | 2 -- 9 files changed, 30 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 60582521db5bd7..ba8b8e1903f307 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -347,15 +347,11 @@ struct _gc_runtime_state { /* gh-117783: Deferred reference counting is not fully implemented yet, so as a temporary measure we treat objects using deferred referenence - counting as immortal. */ - struct { - /* Immortalize objects instead of marking them as using deferred - reference counting. */ - int enabled; - - /* Set enabled=1 when the first background thread is created. */ - int enable_on_thread_created; - } immortalize; + counting as immortal. The value may be zero, one, or a negative number: + 0: immortalize deferred RC objects once the first thread is created + 1: immortalize all deferred RC objects immediately + <0: suppressed; don't immortalize objects */ + int immortalize; #endif }; diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 5825efadffcb29..4b320b494bb8dd 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -529,11 +529,11 @@ def suppress_immortalization(suppress=True): yield return - old_values = _testinternalcapi.set_immortalize_deferred(False) + _testinternalcapi.suppress_immortalization(True) try: yield finally: - _testinternalcapi.set_immortalize_deferred(*old_values) + _testinternalcapi.suppress_immortalization(False) def skip_if_suppress_immortalization(): try: diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index d9b9c999603d5a..6d4a00c06ca9de 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1966,24 +1966,18 @@ get_py_thread_id(PyObject *self, PyObject *Py_UNUSED(ignored)) #endif static PyObject * -set_immortalize_deferred(PyObject *self, PyObject *value) +suppress_immortalization(PyObject *self, PyObject *value) { #ifdef Py_GIL_DISABLED - PyInterpreterState *interp = PyInterpreterState_Get(); - int old_enabled = interp->gc.immortalize.enabled; - int old_enabled_on_thread = interp->gc.immortalize.enable_on_thread_created; - int enabled_on_thread = 0; - if (!PyArg_ParseTuple(value, "i|i", - &interp->gc.immortalize.enabled, - &enabled_on_thread)) - { + int suppress = PyObject_IsTrue(value); + if (suppress < 0) { return NULL; } - interp->gc.immortalize.enable_on_thread_created = enabled_on_thread; - return Py_BuildValue("ii", old_enabled, old_enabled_on_thread); -#else - return Py_BuildValue("OO", Py_False, Py_False); + PyInterpreterState *interp = PyInterpreterState_Get(); + // Subtract two to suppress immortalization (so that 1 -> -1) + _Py_atomic_add_int(&interp->gc.immortalize, suppress ? -2 : 2); #endif + Py_RETURN_NONE; } static PyObject * @@ -1991,7 +1985,7 @@ get_immortalize_deferred(PyObject *self, PyObject *Py_UNUSED(ignored)) { #ifdef Py_GIL_DISABLED PyInterpreterState *interp = PyInterpreterState_Get(); - return PyBool_FromLong(interp->gc.immortalize.enable_on_thread_created); + return PyBool_FromLong(_Py_atomic_load_int(&interp->gc.immortalize) >= 0); #else Py_RETURN_FALSE; #endif @@ -2111,7 +2105,7 @@ static PyMethodDef module_functions[] = { #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif - {"set_immortalize_deferred", set_immortalize_deferred, METH_VARARGS}, + {"suppress_immortalization", suppress_immortalization, METH_O}, {"get_immortalize_deferred", get_immortalize_deferred, METH_NOARGS}, #ifdef _Py_TIER2 {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS}, diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 2ffda8dee07c90..e3e306bfe810c4 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -110,7 +110,7 @@ should_intern_string(PyObject *o) // unless we've disabled immortalizing objects that use deferred reference // counting. PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->gc.immortalize.enable_on_thread_created) { + if (_Py_atomic_load_int(&interp->gc.immortalize) < 0) { return 1; } #endif @@ -240,7 +240,7 @@ intern_constants(PyObject *tuple, int *modified) PyThreadState *tstate = PyThreadState_GET(); if (!_Py_IsImmortal(v) && !PyCode_Check(v) && !PyUnicode_CheckExact(v) && - tstate->interp->gc.immortalize.enable_on_thread_created) + _Py_atomic_load_int(&tstate->interp->gc.immortalize) >= 0) { PyObject *interned = intern_one_constant(v); if (interned == NULL) { diff --git a/Objects/object.c b/Objects/object.c index d4fe14c5b3d1aa..5d53e9e5eaba4e 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2429,7 +2429,7 @@ _PyObject_SetDeferredRefcount(PyObject *op) assert(op->ob_ref_shared == 0); _PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED); PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->gc.immortalize.enabled) { + if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) { // gh-117696: immortalize objects instead of using deferred reference // counting for now. _Py_SetImmortal(op); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 2a02d8161591c6..c4d3ecbeeff0e6 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -870,15 +870,15 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename, // gh-118527: Disable immortalization of code constants for explicit // compile() calls to get consistent frozen outputs between the default // and free-threaded builds. + // Subtract two to suppress immortalization (so that 1 -> -1) PyInterpreterState *interp = _PyInterpreterState_GET(); - int old_value = interp->gc.immortalize.enable_on_thread_created; - interp->gc.immortalize.enable_on_thread_created = 0; + _Py_atomic_add_int(&interp->gc.immortalize, -2); #endif result = Py_CompileStringObject(str, filename, start[compile_mode], &cf, optimize); #ifdef Py_GIL_DISABLED - interp->gc.immortalize.enable_on_thread_created = old_value; + _Py_atomic_add_int(&interp->gc.immortalize, 2); #endif Py_XDECREF(source_copy); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index e6bd012c40ee82..d005b79ff40dbf 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -703,11 +703,9 @@ _PyGC_Init(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - if (_Py_IsMainInterpreter(interp)) { - // gh-117783: immortalize objects that would use deferred refcounting - // once the first non-main thread is created. - gcstate->immortalize.enable_on_thread_created = 1; - } + // gh-117783: immortalize objects that would use deferred refcounting + // once the first non-main thread is created (but not in subinterpreters). + gcstate->immortalize = _Py_IsMainInterpreter(interp) ? 0 : -1; gcstate->garbage = PyList_New(0); if (gcstate->garbage == NULL) { @@ -1808,8 +1806,10 @@ _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp) { struct visitor_args args; _PyEval_StopTheWorld(interp); - gc_visit_heaps(interp, &immortalize_visitor, &args); - interp->gc.immortalize.enabled = 1; + if (interp->gc.immortalize == 0) { + gc_visit_heaps(interp, &immortalize_visitor, &args); + interp->gc.immortalize = 1; + } _PyEval_StartTheWorld(interp); } diff --git a/Python/pystate.c b/Python/pystate.c index 36e4206b4a282e..d0293915db7689 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1583,9 +1583,7 @@ new_threadstate(PyInterpreterState *interp, int whence) } else { #ifdef Py_GIL_DISABLED - if (interp->gc.immortalize.enable_on_thread_created && - !interp->gc.immortalize.enabled) - { + if (_Py_atomic_load_int(&interp->gc.immortalize) == 0) { // Immortalize objects marked as using deferred reference counting // the first time a non-main thread is created. _PyGC_ImmortalizeDeferredObjects(interp); diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 39cc5b080c8703..d5fcac61f0db04 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -47,7 +47,6 @@ race_top:_PyImport_AcquireLock race_top:_Py_dict_lookup_threadsafe race_top:_imp_release_lock race_top:_multiprocessing_SemLock_acquire_impl -race_top:builtin_compile_impl race_top:dictiter_new race_top:dictresize race_top:insert_to_emptydict @@ -55,7 +54,6 @@ race_top:insertdict race_top:list_get_item_ref race_top:make_pending_calls race_top:set_add_entry -race_top:should_intern_string race_top:_Py_slot_tp_getattr_hook race_top:add_threadstate race_top:dump_traceback From d82a7ba041321e7b58a5a9bbc394670be6ceeb7c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 3 Jun 2024 17:56:00 -0400 Subject: [PATCH 066/373] gh-117398: Add datetime Module State (gh-119810) I was able to make use of the existing datetime_state struct, but there was one tricky thing I had to sort out. We mostly aren't converting to heap types, so we can't use things like PyType_GetModuleByDef() to look up the module state. The solution I came up with is somewhat novel, but I consider it straightforward. Also, it shouldn't have much impact on performance. In summary, this main changes here are: * I've added some macros to help hide how various objects relate to module state * as a solution to the module state lookup problem, I've stored the last loaded module on the current interpreter's internal dict (actually a weakref) * if the static type method is used after the module has been deleted, it is reloaded * to avoid extra work when loading the module, we directly copy the objects (new refs only) from the old module state into the new state if the old module hasn't been deleted yet * during module init we set various objects on the static types' __dict__s; to simplify things, we only do that the first time; once those static types have a separate __dict__ per interpreter, we'll do it every time * we now clear the module state when the module is destroyed (before, we were leaking everything in _datetime_global_state) --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 3 + Modules/_datetimemodule.c | 532 ++++++++++++------ 5 files changed, 376 insertions(+), 162 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index b9fae11dfaa85c..b186408931c92e 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -829,6 +829,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_call)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_exception)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_return)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_datetime_module)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_statements)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cadata)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cafile)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index aa66b20859a472..e1808c85acfb2d 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -318,6 +318,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(c_call) STRUCT_FOR_ID(c_exception) STRUCT_FOR_ID(c_return) + STRUCT_FOR_ID(cached_datetime_module) STRUCT_FOR_ID(cached_statements) STRUCT_FOR_ID(cadata) STRUCT_FOR_ID(cafile) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index b27720e9ff6ecf..2dde6febc2cae4 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -827,6 +827,7 @@ extern "C" { INIT_ID(c_call), \ INIT_ID(c_exception), \ INIT_ID(c_return), \ + INIT_ID(cached_datetime_module), \ INIT_ID(cached_statements), \ INIT_ID(cadata), \ INIT_ID(cafile), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index c61c556b758769..b00119a1bad7ff 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -795,6 +795,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(c_return); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(cached_datetime_module); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(cached_statements); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 466382b5148509..16bb4c6980aa08 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -25,17 +25,18 @@ # include /* struct timeval */ #endif + +/* forward declarations */ +static PyTypeObject PyDateTime_DateType; +static PyTypeObject PyDateTime_DateTimeType; +static PyTypeObject PyDateTime_TimeType; +static PyTypeObject PyDateTime_DeltaType; +static PyTypeObject PyDateTime_TZInfoType; +static PyTypeObject PyDateTime_TimeZoneType; + + typedef struct { - /* Static types exposed by the datetime C-API. */ - PyTypeObject *date_type; - PyTypeObject *datetime_type; - PyTypeObject *delta_type; - PyTypeObject *time_type; - PyTypeObject *tzinfo_type; - /* Exposed indirectly via TimeZone_UTC. */ - PyTypeObject *timezone_type; - - /* Other module classes. */ + /* Module heap types. */ PyTypeObject *isocalendar_date_type; /* Conversion factors. */ @@ -47,39 +48,182 @@ typedef struct { PyObject *us_per_week; // 1e6 * 3600 * 24 * 7 as Python int PyObject *seconds_per_day; // 3600 * 24 as Python int - /* The interned UTC timezone instance */ - PyObject *utc; - /* The interned Unix epoch datetime instance */ PyObject *epoch; - - /* While we use a global state, we ensure it's only initialized once */ - int initialized; } datetime_state; -static datetime_state _datetime_global_state; +/* The module has a fixed number of static objects, due to being exposed + * through the datetime C-API. There are five types exposed directly, + * one type exposed indirectly, and one singleton constant (UTC). + * + * Each of these objects is hidden behind a macro in the same way as + * the per-module objects stored in module state. The macros for the + * static objects don't need to be passed a state, but the consistency + * of doing so is more clear. We use a dedicated noop macro, NO_STATE, + * to make the special case obvious. */ + +#define NO_STATE NULL + +#define DATE_TYPE(st) &PyDateTime_DateType +#define DATETIME_TYPE(st) &PyDateTime_DateTimeType +#define TIME_TYPE(st) &PyDateTime_TimeType +#define DELTA_TYPE(st) &PyDateTime_DeltaType +#define TZINFO_TYPE(st) &PyDateTime_TZInfoType +#define TIMEZONE_TYPE(st) &PyDateTime_TimeZoneType +#define ISOCALENDAR_DATE_TYPE(st) st->isocalendar_date_type + +#define PyDate_Check(op) PyObject_TypeCheck(op, DATE_TYPE(NO_STATE)) +#define PyDate_CheckExact(op) Py_IS_TYPE(op, DATE_TYPE(NO_STATE)) + +#define PyDateTime_Check(op) PyObject_TypeCheck(op, DATETIME_TYPE(NO_STATE)) +#define PyDateTime_CheckExact(op) Py_IS_TYPE(op, DATETIME_TYPE(NO_STATE)) + +#define PyTime_Check(op) PyObject_TypeCheck(op, TIME_TYPE(NO_STATE)) +#define PyTime_CheckExact(op) Py_IS_TYPE(op, TIME_TYPE(NO_STATE)) + +#define PyDelta_Check(op) PyObject_TypeCheck(op, DELTA_TYPE(NO_STATE)) +#define PyDelta_CheckExact(op) Py_IS_TYPE(op, DELTA_TYPE(NO_STATE)) + +#define PyTZInfo_Check(op) PyObject_TypeCheck(op, TZINFO_TYPE(NO_STATE)) +#define PyTZInfo_CheckExact(op) Py_IS_TYPE(op, TZINFO_TYPE(NO_STATE)) + +#define PyTimezone_Check(op) PyObject_TypeCheck(op, TIMEZONE_TYPE(NO_STATE)) + +#define CONST_US_PER_MS(st) st->us_per_ms +#define CONST_US_PER_SECOND(st) st->us_per_second +#define CONST_US_PER_MINUTE(st) st->us_per_minute +#define CONST_US_PER_HOUR(st) st->us_per_hour +#define CONST_US_PER_DAY(st) st->us_per_day +#define CONST_US_PER_WEEK(st) st->us_per_week +#define CONST_SEC_PER_DAY(st) st->seconds_per_day +#define CONST_EPOCH(st) st->epoch +#define CONST_UTC(st) ((PyObject *)&utc_timezone) + +static datetime_state * +get_module_state(PyObject *module) +{ + void *state = _PyModule_GetState(module); + assert(state != NULL); + return (datetime_state *)state; +} + + +#define INTERP_KEY ((PyObject *)&_Py_ID(cached_datetime_module)) + +static PyObject * +get_current_module(PyInterpreterState *interp) +{ + PyObject *dict = PyInterpreterState_GetDict(interp); + if (dict == NULL) { + return NULL; + } + PyObject *ref = NULL; + if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { + return NULL; + } + if (ref == NULL) { + return NULL; + } + PyObject *mod = NULL; + (void)PyWeakref_GetRef(ref, &mod); + if (mod == Py_None) { + Py_CLEAR(mod); + } + Py_DECREF(ref); + return mod; +} + +static PyModuleDef datetimemodule; + +static datetime_state * +_get_current_state(PyObject **p_mod) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + PyObject *mod = get_current_module(interp); + if (mod == NULL) { + assert(!PyErr_Occurred()); + if (PyErr_Occurred()) { + return NULL; + } + /* The static types can outlive the module, + * so we must re-import the module. */ + mod = PyImport_ImportModule("_datetime"); + if (mod == NULL) { + return NULL; + } + } + datetime_state *st = get_module_state(mod); + *p_mod = mod; + return st; +} + +#define GET_CURRENT_STATE(MOD_VAR) \ + _get_current_state(&MOD_VAR) +#define RELEASE_CURRENT_STATE(ST_VAR, MOD_VAR) \ + Py_DECREF(MOD_VAR) -static inline datetime_state* get_datetime_state(void) +static int +set_current_module(PyInterpreterState *interp, PyObject *mod) { - return &_datetime_global_state; + assert(mod != NULL); + PyObject *dict = PyInterpreterState_GetDict(interp); + if (dict == NULL) { + return -1; + } + PyObject *ref = PyWeakref_NewRef(mod, NULL); + if (ref == NULL) { + return -1; + } + int rc = PyDict_SetItem(dict, INTERP_KEY, ref); + Py_DECREF(ref); + return rc; } -#define PyDate_Check(op) PyObject_TypeCheck(op, get_datetime_state()->date_type) -#define PyDate_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->date_type) +static void +clear_current_module(PyInterpreterState *interp, PyObject *expected) +{ + PyObject *exc = PyErr_GetRaisedException(); + + PyObject *current = NULL; + + PyObject *dict = PyInterpreterState_GetDict(interp); + if (dict == NULL) { + goto error; + } + + if (expected != NULL) { + PyObject *ref = NULL; + if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { + goto error; + } + if (ref != NULL) { + int rc = PyWeakref_GetRef(ref, ¤t); + Py_DECREF(ref); + if (rc < 0) { + goto error; + } + if (current != expected) { + goto finally; + } + } + } -#define PyDateTime_Check(op) PyObject_TypeCheck(op, get_datetime_state()->datetime_type) -#define PyDateTime_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->datetime_type) + if (PyDict_DelItem(dict, INTERP_KEY) < 0) { + if (!PyErr_ExceptionMatches(PyExc_KeyError)) { + goto error; + } + } -#define PyTime_Check(op) PyObject_TypeCheck(op, get_datetime_state()->time_type) -#define PyTime_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->time_type) + goto finally; -#define PyDelta_Check(op) PyObject_TypeCheck(op, get_datetime_state()->delta_type) -#define PyDelta_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->delta_type) +error: + PyErr_Print(); -#define PyTZInfo_Check(op) PyObject_TypeCheck(op, get_datetime_state()->tzinfo_type) -#define PyTZInfo_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->tzinfo_type) +finally: + Py_XDECREF(current); + PyErr_SetRaisedException(exc); +} -#define PyTimezone_Check(op) PyObject_TypeCheck(op, get_datetime_state()->timezone_type) /* We require that C int be at least 32 bits, and use int virtually * everywhere. In just a few cases we use a temp long, where a Python @@ -988,7 +1132,7 @@ new_date_ex(int year, int month, int day, PyTypeObject *type) } #define new_date(year, month, day) \ - new_date_ex(year, month, day, get_datetime_state()->date_type) + new_date_ex(year, month, day, DATE_TYPE(NO_STATE)) // Forward declaration static PyObject * @@ -998,13 +1142,12 @@ new_datetime_ex(int, int, int, int, int, int, int, PyObject *, PyTypeObject *); static PyObject * new_date_subclass_ex(int year, int month, int day, PyObject *cls) { - datetime_state *st = get_datetime_state(); PyObject *result; // We have "fast path" constructors for two subclasses: date and datetime - if ((PyTypeObject *)cls == st->date_type) { + if ((PyTypeObject *)cls == DATE_TYPE(NO_STATE)) { result = new_date_ex(year, month, day, (PyTypeObject *)cls); } - else if ((PyTypeObject *)cls == st->datetime_type) { + else if ((PyTypeObject *)cls == DATETIME_TYPE(NO_STATE)) { result = new_datetime_ex(year, month, day, 0, 0, 0, 0, Py_None, (PyTypeObject *)cls); } @@ -1058,8 +1201,7 @@ new_datetime_ex(int year, int month, int day, int hour, int minute, } #define new_datetime(y, m, d, hh, mm, ss, us, tzinfo, fold) \ - new_datetime_ex2(y, m, d, hh, mm, ss, us, tzinfo, fold, \ - get_datetime_state()->datetime_type) + new_datetime_ex2(y, m, d, hh, mm, ss, us, tzinfo, fold, DATETIME_TYPE(NO_STATE)) static PyObject * call_subclass_fold(PyObject *cls, int fold, const char *format, ...) @@ -1100,9 +1242,8 @@ new_datetime_subclass_fold_ex(int year, int month, int day, int hour, int minute int second, int usecond, PyObject *tzinfo, int fold, PyObject *cls) { - datetime_state *st = get_datetime_state(); PyObject* dt; - if ((PyTypeObject*)cls == st->datetime_type) { + if ((PyTypeObject*)cls == DATETIME_TYPE(NO_STATE)) { // Use the fast path constructor dt = new_datetime(year, month, day, hour, minute, second, usecond, tzinfo, fold); @@ -1163,16 +1304,15 @@ new_time_ex(int hour, int minute, int second, int usecond, return new_time_ex2(hour, minute, second, usecond, tzinfo, 0, type); } -#define new_time(hh, mm, ss, us, tzinfo, fold) \ - new_time_ex2(hh, mm, ss, us, tzinfo, fold, get_datetime_state()->time_type) +#define new_time(hh, mm, ss, us, tzinfo, fold) \ + new_time_ex2(hh, mm, ss, us, tzinfo, fold, TIME_TYPE(NO_STATE)) static PyObject * new_time_subclass_fold_ex(int hour, int minute, int second, int usecond, PyObject *tzinfo, int fold, PyObject *cls) { PyObject *t; - datetime_state *st = get_datetime_state(); - if ((PyTypeObject*)cls == st->time_type) { + if ((PyTypeObject*)cls == TIME_TYPE(NO_STATE)) { // Use the fast path constructor t = new_time(hour, minute, second, usecond, tzinfo, fold); } @@ -1224,7 +1364,7 @@ new_delta_ex(int days, int seconds, int microseconds, int normalize, } #define new_delta(d, s, us, normalize) \ - new_delta_ex(d, s, us, normalize, get_datetime_state()->delta_type) + new_delta_ex(d, s, us, normalize, DELTA_TYPE(NO_STATE)) typedef struct @@ -1244,8 +1384,7 @@ static PyObject * create_timezone(PyObject *offset, PyObject *name) { PyDateTime_TimeZone *self; - datetime_state *st = get_datetime_state(); - PyTypeObject *type = st->timezone_type; + PyTypeObject *type = TIMEZONE_TYPE(NO_STATE); assert(offset != NULL); assert(PyDelta_Check(offset)); @@ -1267,6 +1406,7 @@ create_timezone(PyObject *offset, PyObject *name) } static int delta_bool(PyDateTime_Delta *self); +static PyDateTime_TimeZone utc_timezone; static PyObject * new_timezone(PyObject *offset, PyObject *name) @@ -1276,8 +1416,7 @@ new_timezone(PyObject *offset, PyObject *name) assert(name == NULL || PyUnicode_Check(name)); if (name == NULL && delta_bool((PyDateTime_Delta *)offset) == 0) { - datetime_state *st = get_datetime_state(); - return Py_NewRef(st->utc); + return Py_NewRef(CONST_UTC(NO_STATE)); } if ((GET_TD_DAYS(offset) == -1 && GET_TD_SECONDS(offset) == 0 && @@ -1490,8 +1629,7 @@ tzinfo_from_isoformat_results(int rv, int tzoffset, int tz_useconds) if (rv == 1) { // Create a timezone from offset in seconds (0 returns UTC) if (tzoffset == 0) { - datetime_state *st = get_datetime_state(); - return Py_NewRef(st->utc); + return Py_NewRef(CONST_UTC(NO_STATE)); } PyObject *delta = new_delta(0, tzoffset, tz_useconds, 1); @@ -1920,11 +2058,13 @@ delta_to_microseconds(PyDateTime_Delta *self) PyObject *x3 = NULL; PyObject *result = NULL; + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + x1 = PyLong_FromLong(GET_TD_DAYS(self)); if (x1 == NULL) goto Done; - datetime_state *st = get_datetime_state(); - x2 = PyNumber_Multiply(x1, st->seconds_per_day); /* days in seconds */ + x2 = PyNumber_Multiply(x1, CONST_SEC_PER_DAY(st)); /* days in seconds */ if (x2 == NULL) goto Done; Py_SETREF(x1, NULL); @@ -1941,7 +2081,7 @@ delta_to_microseconds(PyDateTime_Delta *self) /* x1 = */ x2 = NULL; /* x3 has days+seconds in seconds */ - x1 = PyNumber_Multiply(x3, st->us_per_second); /* us */ + x1 = PyNumber_Multiply(x3, CONST_US_PER_SECOND(st)); /* us */ if (x1 == NULL) goto Done; Py_SETREF(x3, NULL); @@ -1957,6 +2097,7 @@ delta_to_microseconds(PyDateTime_Delta *self) Py_XDECREF(x1); Py_XDECREF(x2); Py_XDECREF(x3); + RELEASE_CURRENT_STATE(st, current_mod); return result; } @@ -1996,8 +2137,10 @@ microseconds_to_delta_ex(PyObject *pyus, PyTypeObject *type) PyObject *num = NULL; PyObject *result = NULL; - datetime_state *st = get_datetime_state(); - tuple = checked_divmod(pyus, st->us_per_second); + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + + tuple = checked_divmod(pyus, CONST_US_PER_SECOND(st)); if (tuple == NULL) { goto Done; } @@ -2015,7 +2158,7 @@ microseconds_to_delta_ex(PyObject *pyus, PyTypeObject *type) num = Py_NewRef(PyTuple_GET_ITEM(tuple, 0)); /* leftover seconds */ Py_DECREF(tuple); - tuple = checked_divmod(num, st->seconds_per_day); + tuple = checked_divmod(num, CONST_SEC_PER_DAY(st)); if (tuple == NULL) goto Done; Py_DECREF(num); @@ -2040,6 +2183,7 @@ microseconds_to_delta_ex(PyObject *pyus, PyTypeObject *type) Done: Py_XDECREF(tuple); Py_XDECREF(num); + RELEASE_CURRENT_STATE(st, current_mod); return result; BadDivmod: @@ -2049,7 +2193,7 @@ microseconds_to_delta_ex(PyObject *pyus, PyTypeObject *type) } #define microseconds_to_delta(pymicros) \ - microseconds_to_delta_ex(pymicros, get_datetime_state()->delta_type) + microseconds_to_delta_ex(pymicros, DELTA_TYPE(NO_STATE)) static PyObject * multiply_int_timedelta(PyObject *intobj, PyDateTime_Delta *delta) @@ -2577,6 +2721,9 @@ delta_new(PyTypeObject *type, PyObject *args, PyObject *kw) { PyObject *self = NULL; + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + /* Argument objects. */ PyObject *day = NULL; PyObject *second = NULL; @@ -2615,29 +2762,28 @@ delta_new(PyTypeObject *type, PyObject *args, PyObject *kw) y = accum("microseconds", x, us, _PyLong_GetOne(), &leftover_us); CLEANUP; } - datetime_state *st = get_datetime_state(); if (ms) { - y = accum("milliseconds", x, ms, st->us_per_ms, &leftover_us); + y = accum("milliseconds", x, ms, CONST_US_PER_MS(st), &leftover_us); CLEANUP; } if (second) { - y = accum("seconds", x, second, st->us_per_second, &leftover_us); + y = accum("seconds", x, second, CONST_US_PER_SECOND(st), &leftover_us); CLEANUP; } if (minute) { - y = accum("minutes", x, minute, st->us_per_minute, &leftover_us); + y = accum("minutes", x, minute, CONST_US_PER_MINUTE(st), &leftover_us); CLEANUP; } if (hour) { - y = accum("hours", x, hour, st->us_per_hour, &leftover_us); + y = accum("hours", x, hour, CONST_US_PER_HOUR(st), &leftover_us); CLEANUP; } if (day) { - y = accum("days", x, day, st->us_per_day, &leftover_us); + y = accum("days", x, day, CONST_US_PER_DAY(st), &leftover_us); CLEANUP; } if (week) { - y = accum("weeks", x, week, st->us_per_week, &leftover_us); + y = accum("weeks", x, week, CONST_US_PER_WEEK(st), &leftover_us); CLEANUP; } if (leftover_us) { @@ -2679,7 +2825,9 @@ delta_new(PyTypeObject *type, PyObject *args, PyObject *kw) self = microseconds_to_delta_ex(x, type); Py_DECREF(x); + Done: + RELEASE_CURRENT_STATE(st, current_mod); return self; #undef CLEANUP @@ -2792,9 +2940,12 @@ delta_total_seconds(PyObject *self, PyObject *Py_UNUSED(ignored)) if (total_microseconds == NULL) return NULL; - datetime_state *st = get_datetime_state(); - total_seconds = PyNumber_TrueDivide(total_microseconds, st->us_per_second); + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + + total_seconds = PyNumber_TrueDivide(total_microseconds, CONST_US_PER_SECOND(st)); + RELEASE_CURRENT_STATE(st, current_mod); Py_DECREF(total_microseconds); return total_seconds; } @@ -3547,9 +3698,12 @@ date_isocalendar(PyDateTime_Date *self, PyObject *Py_UNUSED(ignored)) week = 0; } - datetime_state *st = get_datetime_state(); - PyObject *v = iso_calendar_date_new_impl(st->isocalendar_date_type, + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + + PyObject *v = iso_calendar_date_new_impl(ISOCALENDAR_DATE_TYPE(st), year, week + 1, day + 1); + RELEASE_CURRENT_STATE(st, current_mod); if (v == NULL) { return NULL; } @@ -4018,9 +4172,8 @@ timezone_new(PyTypeObject *type, PyObject *args, PyObject *kw) { PyObject *offset; PyObject *name = NULL; - datetime_state *st = get_datetime_state(); if (PyArg_ParseTupleAndKeywords(args, kw, "O!|U:timezone", timezone_kws, - st->delta_type, &offset, &name)) + DELTA_TYPE(NO_STATE), &offset, &name)) return new_timezone(offset, name); return NULL; @@ -4073,8 +4226,7 @@ timezone_repr(PyDateTime_TimeZone *self) to use Py_TYPE(self)->tp_name here. */ const char *type_name = Py_TYPE(self)->tp_name; - datetime_state *st = get_datetime_state(); - if (((PyObject *)self) == st->utc) { + if ((PyObject *)self == CONST_UTC(NO_STATE)) { return PyUnicode_FromFormat("%s.utc", type_name); } @@ -4096,8 +4248,7 @@ timezone_str(PyDateTime_TimeZone *self) if (self->name != NULL) { return Py_NewRef(self->name); } - datetime_state *st = get_datetime_state(); - if ((PyObject *)self == st->utc || + if ((PyObject *)self == CONST_UTC(NO_STATE) || (GET_TD_DAYS(self->offset) == 0 && GET_TD_SECONDS(self->offset) == 0 && GET_TD_MICROSECONDS(self->offset) == 0)) @@ -4260,7 +4411,7 @@ static PyDateTime_TimeZone * look_up_timezone(PyObject *offset, PyObject *name) { if (offset == utc_timezone.offset && name == NULL) { - return &utc_timezone; + return (PyDateTime_TimeZone *)CONST_UTC(NO_STATE); } return NULL; } @@ -4777,8 +4928,7 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { } PyObject *t; - datetime_state *st = get_datetime_state(); - if ( (PyTypeObject *)cls == st->time_type) { + if ( (PyTypeObject *)cls == TIME_TYPE(NO_STATE)) { t = new_time(hour, minute, second, microsecond, tzinfo, 0); } else { t = PyObject_CallFunction(cls, "iiiiO", @@ -5376,10 +5526,9 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw) PyObject *tzinfo = NULL; PyObject *result = NULL; - datetime_state *st = get_datetime_state(); if (PyArg_ParseTupleAndKeywords(args, kw, "O!O!|O:combine", keywords, - st->date_type, &date, - st->time_type, &time, &tzinfo)) { + DATE_TYPE(NO_STATE), &date, + TIME_TYPE(NO_STATE), &time, &tzinfo)) { if (tzinfo == NULL) { if (HASTZINFO(time)) tzinfo = ((PyDateTime_Time *)time)->tzinfo; @@ -6209,7 +6358,6 @@ local_timezone_from_timestamp(time_t timestamp) delta = new_delta(0, local_time_tm.tm_gmtoff, 0, 1); #else /* HAVE_STRUCT_TM_TM_ZONE */ { - datetime_state *st = get_datetime_state(); PyObject *local_time, *utc_time; struct tm utc_time_tm; char buf[100]; @@ -6264,8 +6412,11 @@ local_timezone(PyDateTime_DateTime *utc_time) PyObject *one_second; PyObject *seconds; - datetime_state *st = get_datetime_state(); - delta = datetime_subtract((PyObject *)utc_time, st->epoch); + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + + delta = datetime_subtract((PyObject *)utc_time, CONST_EPOCH(st)); + RELEASE_CURRENT_STATE(st, current_mod); if (delta == NULL) return NULL; @@ -6378,7 +6529,6 @@ datetime_astimezone(PyDateTime_DateTime *self, PyObject *args, PyObject *kw) if (result == NULL) return NULL; - datetime_state *st = get_datetime_state(); /* Make sure result is aware and UTC. */ if (!HASTZINFO(result)) { temp = (PyObject *)result; @@ -6390,7 +6540,7 @@ datetime_astimezone(PyDateTime_DateTime *self, PyObject *args, PyObject *kw) DATE_GET_MINUTE(result), DATE_GET_SECOND(result), DATE_GET_MICROSECOND(result), - st->utc, + CONST_UTC(NO_STATE), DATE_GET_FOLD(result), Py_TYPE(result)); Py_DECREF(temp); @@ -6399,7 +6549,7 @@ datetime_astimezone(PyDateTime_DateTime *self, PyObject *args, PyObject *kw) } else { /* Result is already aware - just replace tzinfo. */ - Py_SETREF(result->tzinfo, Py_NewRef(st->utc)); + Py_SETREF(result->tzinfo, Py_NewRef(CONST_UTC(NO_STATE))); } /* Attach new tzinfo and let fromutc() do the rest. */ @@ -6503,9 +6653,12 @@ datetime_timestamp(PyDateTime_DateTime *self, PyObject *Py_UNUSED(ignored)) PyObject *result; if (HASTZINFO(self) && self->tzinfo != Py_None) { - datetime_state *st = get_datetime_state(); + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + PyObject *delta; - delta = datetime_subtract((PyObject *)self, st->epoch); + delta = datetime_subtract((PyObject *)self, CONST_EPOCH(st)); + RELEASE_CURRENT_STATE(st, current_mod); if (delta == NULL) return NULL; result = delta_total_seconds(delta, NULL); @@ -6839,23 +6992,6 @@ get_datetime_capi(void) return &capi; } -static int -datetime_clear(PyObject *module) -{ - datetime_state *st = get_datetime_state(); - - Py_CLEAR(st->us_per_ms); - Py_CLEAR(st->us_per_second); - Py_CLEAR(st->us_per_minute); - Py_CLEAR(st->us_per_hour); - Py_CLEAR(st->us_per_day); - Py_CLEAR(st->us_per_week); - Py_CLEAR(st->seconds_per_day); - Py_CLEAR(st->utc); - Py_CLEAR(st->epoch); - return 0; -} - static PyObject * create_timezone_from_delta(int days, int sec, int ms, int normalize) { @@ -6869,25 +7005,39 @@ create_timezone_from_delta(int days, int sec, int ms, int normalize) } static int -init_state(datetime_state *st, PyTypeObject *PyDateTime_IsoCalendarDateType) -{ - // While datetime uses global module "state", we unly initialize it once. - // The PyLong objects created here (once per process) are not decref'd. - if (st->initialized) { +init_state(datetime_state *st, PyObject *module, PyObject *old_module) +{ + /* Each module gets its own heap types. */ +#define ADD_TYPE(FIELD, SPEC, BASE) \ + do { \ + PyObject *cls = PyType_FromModuleAndSpec( \ + module, SPEC, (PyObject *)BASE); \ + if (cls == NULL) { \ + return -1; \ + } \ + st->FIELD = (PyTypeObject *)cls; \ + } while (0) + + ADD_TYPE(isocalendar_date_type, &isocal_spec, &PyTuple_Type); +#undef ADD_TYPE + + if (old_module != NULL) { + assert(old_module != module); + datetime_state *st_old = get_module_state(old_module); + *st = (datetime_state){ + .isocalendar_date_type = st->isocalendar_date_type, + .us_per_ms = Py_NewRef(st_old->us_per_ms), + .us_per_second = Py_NewRef(st_old->us_per_second), + .us_per_minute = Py_NewRef(st_old->us_per_minute), + .us_per_hour = Py_NewRef(st_old->us_per_hour), + .us_per_day = Py_NewRef(st_old->us_per_day), + .us_per_week = Py_NewRef(st_old->us_per_week), + .seconds_per_day = Py_NewRef(st_old->seconds_per_day), + .epoch = Py_NewRef(st_old->epoch), + }; return 0; } - /* Static types exposed by the C-API. */ - st->date_type = &PyDateTime_DateType; - st->datetime_type = &PyDateTime_DateTimeType; - st->delta_type = &PyDateTime_DeltaType; - st->time_type = &PyDateTime_TimeType; - st->tzinfo_type = &PyDateTime_TZInfoType; - st->timezone_type = &PyDateTime_TimeZoneType; - - /* Per-module heap types. */ - st->isocalendar_date_type = PyDateTime_IsoCalendarDateType; - st->us_per_ms = PyLong_FromLong(1000); if (st->us_per_ms == NULL) { return -1; @@ -6921,26 +7071,54 @@ init_state(datetime_state *st, PyTypeObject *PyDateTime_IsoCalendarDateType) return -1; } - /* Init UTC timezone */ - st->utc = create_timezone_from_delta(0, 0, 0, 0); - if (st->utc == NULL) { - return -1; - } - /* Init Unix epoch */ - st->epoch = new_datetime(1970, 1, 1, 0, 0, 0, 0, st->utc, 0); + st->epoch = new_datetime( + 1970, 1, 1, 0, 0, 0, 0, (PyObject *)&utc_timezone, 0); if (st->epoch == NULL) { return -1; } - st->initialized = 1; + return 0; +} + +static int +traverse_state(datetime_state *st, visitproc visit, void *arg) +{ + /* heap types */ + Py_VISIT(st->isocalendar_date_type); return 0; } +static int +clear_state(datetime_state *st) +{ + Py_CLEAR(st->isocalendar_date_type); + Py_CLEAR(st->us_per_ms); + Py_CLEAR(st->us_per_second); + Py_CLEAR(st->us_per_minute); + Py_CLEAR(st->us_per_hour); + Py_CLEAR(st->us_per_day); + Py_CLEAR(st->us_per_week); + Py_CLEAR(st->seconds_per_day); + Py_CLEAR(st->epoch); + return 0; +} + static int _datetime_exec(PyObject *module) { + int rc = -1; + datetime_state *st = get_module_state(module); + + PyInterpreterState *interp = PyInterpreterState_Get(); + PyObject *old_module = get_current_module(interp); + if (PyErr_Occurred()) { + assert(old_module == NULL); + goto error; + } + /* We actually set the "current" module right before a successful return. */ + // `&...` is not a constant expression according to a strict reading // of C standards. Fill tp_base at run-time rather than statically. // See https://bugs.python.org/issue40777 @@ -6953,6 +7131,7 @@ _datetime_exec(PyObject *module) &PyDateTime_TimeType, &PyDateTime_DeltaType, &PyDateTime_TZInfoType, + /* Indirectly, via the utc object. */ &PyDateTime_TimeZoneType, }; @@ -6962,29 +7141,16 @@ _datetime_exec(PyObject *module) } } -#define CREATE_TYPE(VAR, SPEC, BASE) \ - do { \ - VAR = (PyTypeObject *)PyType_FromModuleAndSpec( \ - module, SPEC, (PyObject *)BASE); \ - if (VAR == NULL) { \ - goto error; \ - } \ - } while (0) - - PyTypeObject *PyDateTime_IsoCalendarDateType = NULL; - datetime_state *st = get_datetime_state(); - - if (!st->initialized) { - CREATE_TYPE(PyDateTime_IsoCalendarDateType, &isocal_spec, &PyTuple_Type); - } -#undef CREATE_TYPE - - if (init_state(st, PyDateTime_IsoCalendarDateType) < 0) { + if (init_state(st, module, old_module) < 0) { goto error; } + /* For now we only set the objects on the static types once. + * We will relax that once each types __dict__ is per-interpreter. */ #define DATETIME_ADD_MACRO(dict, c, value_expr) \ do { \ + if (PyDict_GetItemString(dict, c) == NULL) { \ + assert(!PyErr_Occurred()); \ PyObject *value = (value_expr); \ if (value == NULL) { \ goto error; \ @@ -6994,29 +7160,30 @@ _datetime_exec(PyObject *module) goto error; \ } \ Py_DECREF(value); \ + } \ } while(0) /* timedelta values */ - PyObject *d = st->delta_type->tp_dict; + PyObject *d = PyDateTime_DeltaType.tp_dict; DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); DATETIME_ADD_MACRO(d, "min", new_delta(-MAX_DELTA_DAYS, 0, 0, 0)); DATETIME_ADD_MACRO(d, "max", new_delta(MAX_DELTA_DAYS, 24*3600-1, 1000000-1, 0)); /* date values */ - d = st->date_type->tp_dict; + d = PyDateTime_DateType.tp_dict; DATETIME_ADD_MACRO(d, "min", new_date(1, 1, 1)); DATETIME_ADD_MACRO(d, "max", new_date(MAXYEAR, 12, 31)); DATETIME_ADD_MACRO(d, "resolution", new_delta(1, 0, 0, 0)); /* time values */ - d = st->time_type->tp_dict; + d = PyDateTime_TimeType.tp_dict; DATETIME_ADD_MACRO(d, "min", new_time(0, 0, 0, 0, Py_None, 0)); DATETIME_ADD_MACRO(d, "max", new_time(23, 59, 59, 999999, Py_None, 0)); DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); /* datetime values */ - d = st->datetime_type->tp_dict; + d = PyDateTime_DateTimeType.tp_dict; DATETIME_ADD_MACRO(d, "min", new_datetime(1, 1, 1, 0, 0, 0, 0, Py_None, 0)); DATETIME_ADD_MACRO(d, "max", new_datetime(MAXYEAR, 12, 31, 23, 59, 59, @@ -7024,8 +7191,8 @@ _datetime_exec(PyObject *module) DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); /* timezone values */ - d = st->timezone_type->tp_dict; - if (PyDict_SetItemString(d, "utc", st->utc) < 0) { + d = PyDateTime_TimeZoneType.tp_dict; + if (PyDict_SetItemString(d, "utc", (PyObject *)&utc_timezone) < 0) { goto error; } @@ -7034,12 +7201,13 @@ _datetime_exec(PyObject *module) * values. This may change in the future.*/ /* -23:59 */ - PyObject *min = create_timezone_from_delta(-1, 60, 0, 1); - DATETIME_ADD_MACRO(d, "min", min); + DATETIME_ADD_MACRO(d, "min", create_timezone_from_delta(-1, 60, 0, 1)); /* +23:59 */ - PyObject *max = create_timezone_from_delta(0, (23 * 60 + 59) * 60, 0, 0); - DATETIME_ADD_MACRO(d, "max", max); + DATETIME_ADD_MACRO( + d, "max", create_timezone_from_delta(0, (23 * 60 + 59) * 60, 0, 0)); + +#undef DATETIME_ADD_MACRO /* Add module level attributes */ if (PyModule_AddIntMacro(module, MINYEAR) < 0) { @@ -7048,7 +7216,7 @@ _datetime_exec(PyObject *module) if (PyModule_AddIntMacro(module, MAXYEAR) < 0) { goto error; } - if (PyModule_AddObjectRef(module, "UTC", st->utc) < 0) { + if (PyModule_AddObjectRef(module, "UTC", (PyObject *)&utc_timezone) < 0) { goto error; } @@ -7081,13 +7249,20 @@ _datetime_exec(PyObject *module) static_assert(DI100Y == 25 * DI4Y - 1, "DI100Y"); assert(DI100Y == days_before_year(100+1)); - return 0; + if (set_current_module(interp, module) < 0) { + goto error; + } + + rc = 0; + goto finally; error: - datetime_clear(module); - return -1; + clear_state(st); + +finally: + Py_XDECREF(old_module); + return rc; } -#undef DATETIME_ADD_MACRO static PyModuleDef_Slot module_slots[] = { {Py_mod_exec, _datetime_exec}, @@ -7096,13 +7271,46 @@ static PyModuleDef_Slot module_slots[] = { {0, NULL}, }; +static int +module_traverse(PyObject *mod, visitproc visit, void *arg) +{ + datetime_state *st = get_module_state(mod); + traverse_state(st, visit, arg); + return 0; +} + +static int +module_clear(PyObject *mod) +{ + datetime_state *st = get_module_state(mod); + clear_state(st); + + PyInterpreterState *interp = PyInterpreterState_Get(); + clear_current_module(interp, mod); + + return 0; +} + +static void +module_free(void *mod) +{ + datetime_state *st = get_module_state((PyObject *)mod); + clear_state(st); + + PyInterpreterState *interp = PyInterpreterState_Get(); + clear_current_module(interp, (PyObject *)mod); +} + static PyModuleDef datetimemodule = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "_datetime", .m_doc = "Fast implementation of the datetime type.", - .m_size = 0, + .m_size = sizeof(datetime_state), .m_methods = module_methods, .m_slots = module_slots, + .m_traverse = module_traverse, + .m_clear = module_clear, + .m_free = module_free, }; PyMODINIT_FUNC From dba7a167dbbd50e83e58df351f3414b7a08e0188 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 3 Jun 2024 18:42:48 -0400 Subject: [PATCH 067/373] gh-117142: Support Importing ctypes in Isolated Interpreters (gh-119991) This makes the support official. Co-authored-by: Kirill Podoprigora --- .../next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst | 2 ++ Modules/_ctypes/_ctypes.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst diff --git a/Misc/NEWS.d/next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst b/Misc/NEWS.d/next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst new file mode 100644 index 00000000000000..80734ef3946300 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst @@ -0,0 +1,2 @@ +The :mod:`ctypes` module may now be imported in all subinterpreters, including +those that have their own GIL. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 6c1e5f58b95657..1d9534671a4ee8 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -5939,7 +5939,7 @@ module_free(void *module) static PyModuleDef_Slot module_slots[] = { {Py_mod_exec, _ctypes_mod_exec}, - {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} }; From 105f22ea46ac16866e6df18ebae2a8ba422b7f45 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 3 Jun 2024 19:09:18 -0400 Subject: [PATCH 068/373] gh-117398: Use Per-Interpreter State for the _datetime Static Types (gh-119929) We make use of the same mechanism that we use for the static builtin types. This required a few tweaks. The relevant code could use some cleanup but I opted to avoid the significant churn in this change. I'll tackle that separately. This change is the final piece needed to make _datetime support multiple interpreters. I've updated the module slot accordingly. --- Include/internal/pycore_object.h | 2 +- Include/internal/pycore_typeobject.h | 48 ++- ...-06-01-16-58-43.gh-issue-117398.kR0RW7.rst | 2 + Modules/_datetimemodule.c | 197 +++++++++---- Objects/exceptions.c | 2 +- Objects/object.c | 2 +- Objects/structseq.c | 2 +- Objects/typeobject.c | 278 ++++++++++++------ Objects/unicodeobject.c | 6 +- Objects/weakrefobject.c | 2 +- Python/crossinterp_exceptions.h | 4 +- Tools/c-analyzer/cpython/globals-to-fix.tsv | 1 + Tools/c-analyzer/cpython/ignored.tsv | 1 + 13 files changed, 381 insertions(+), 166 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-01-16-58-43.gh-issue-117398.kR0RW7.rst diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index f63e1da6fba025..6f133014ce06e2 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -589,7 +589,7 @@ _PyObject_GET_WEAKREFS_LISTPTR(PyObject *op) if (PyType_Check(op) && ((PyTypeObject *)op)->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState( + managed_static_type_state *state = _PyStaticType_GetState( interp, (PyTypeObject *)op); return _PyStaticType_GET_WEAKREFS_LISTPTR(state); } diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 7e533bd138469b..8664ae0e44533f 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -44,10 +44,12 @@ struct type_cache { /* For now we hard-code this to a value for which we are confident all the static builtin types will fit (for all builds). */ -#define _Py_MAX_STATIC_BUILTIN_TYPES 200 +#define _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES 200 +#define _Py_MAX_MANAGED_STATIC_EXT_TYPES 10 typedef struct { PyTypeObject *type; + int isbuiltin; int readying; int ready; // XXX tp_dict can probably be statically allocated, @@ -59,7 +61,7 @@ typedef struct { are also some diagnostic uses for the list of weakrefs, so we still keep it. */ PyObject *tp_weaklist; -} static_builtin_state; +} managed_static_type_state; struct types_state { /* Used to set PyTypeObject.tp_version_tag. @@ -105,8 +107,16 @@ struct types_state { num_builtins_initialized is incremented once for each static builtin type. Once initialization is over for a subinterpreter, the value will be the same as for all other interpreters. */ - size_t num_builtins_initialized; - static_builtin_state builtins[_Py_MAX_STATIC_BUILTIN_TYPES]; + struct { + size_t num_initialized; + managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_BUILTIN_TYPES]; + } builtins; + /* We apply a similar strategy for managed extension modules. */ + struct { + size_t num_initialized; + size_t next_index; + managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; + } for_extensions; PyMutex mutex; }; @@ -130,12 +140,35 @@ typedef struct wrapperbase pytype_slotdef; static inline PyObject ** -_PyStaticType_GET_WEAKREFS_LISTPTR(static_builtin_state *state) +_PyStaticType_GET_WEAKREFS_LISTPTR(managed_static_type_state *state) { assert(state != NULL); return &state->tp_weaklist; } +extern int _PyStaticType_InitBuiltin( + PyInterpreterState *interp, + PyTypeObject *type); +extern void _PyStaticType_FiniBuiltin( + PyInterpreterState *interp, + PyTypeObject *type); +extern void _PyStaticType_ClearWeakRefs( + PyInterpreterState *interp, + PyTypeObject *type); +extern managed_static_type_state * _PyStaticType_GetState( + PyInterpreterState *interp, + PyTypeObject *type); + +// Export for '_datetime' shared extension. +PyAPI_FUNC(int) _PyStaticType_InitForExtension( + PyInterpreterState *interp, + PyTypeObject *self); +PyAPI_FUNC(void) _PyStaticType_FiniForExtension( + PyInterpreterState *interp, + PyTypeObject *self, + int final); + + /* Like PyType_GetModuleState, but skips verification * that type is a heap type with an associated module */ static inline void * @@ -151,11 +184,6 @@ _PyType_GetModuleState(PyTypeObject *type) } -extern int _PyStaticType_InitBuiltin(PyInterpreterState *, PyTypeObject *type); -extern static_builtin_state * _PyStaticType_GetState(PyInterpreterState *, PyTypeObject *); -extern void _PyStaticType_ClearWeakRefs(PyInterpreterState *, PyTypeObject *type); -extern void _PyStaticType_Dealloc(PyInterpreterState *, PyTypeObject *); - // Export for 'math' shared extension, used via _PyType_IsReady() static inline // function PyAPI_FUNC(PyObject *) _PyType_GetDict(PyTypeObject *); diff --git a/Misc/NEWS.d/next/Library/2024-06-01-16-58-43.gh-issue-117398.kR0RW7.rst b/Misc/NEWS.d/next/Library/2024-06-01-16-58-43.gh-issue-117398.kR0RW7.rst new file mode 100644 index 00000000000000..b0fe06663248f6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-01-16-58-43.gh-issue-117398.kR0RW7.rst @@ -0,0 +1,2 @@ +The ``_datetime`` module (C implementation for :mod:`datetime`) now supports +being imported in multiple interpreters. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 16bb4c6980aa08..d6fa273c75e15e 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -111,26 +111,37 @@ get_module_state(PyObject *module) #define INTERP_KEY ((PyObject *)&_Py_ID(cached_datetime_module)) static PyObject * -get_current_module(PyInterpreterState *interp) +get_current_module(PyInterpreterState *interp, int *p_reloading) { + PyObject *mod = NULL; + int reloading = 0; + PyObject *dict = PyInterpreterState_GetDict(interp); if (dict == NULL) { - return NULL; + goto error; } PyObject *ref = NULL; if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { - return NULL; + goto error; } - if (ref == NULL) { - return NULL; + if (ref != NULL) { + reloading = 1; + if (ref != Py_None) { + (void)PyWeakref_GetRef(ref, &mod); + if (mod == Py_None) { + Py_CLEAR(mod); + } + Py_DECREF(ref); + } } - PyObject *mod = NULL; - (void)PyWeakref_GetRef(ref, &mod); - if (mod == Py_None) { - Py_CLEAR(mod); + if (p_reloading != NULL) { + *p_reloading = reloading; } - Py_DECREF(ref); return mod; + +error: + assert(PyErr_Occurred()); + return NULL; } static PyModuleDef datetimemodule; @@ -139,7 +150,7 @@ static datetime_state * _get_current_state(PyObject **p_mod) { PyInterpreterState *interp = PyInterpreterState_Get(); - PyObject *mod = get_current_module(interp); + PyObject *mod = get_current_module(interp, NULL); if (mod == NULL) { assert(!PyErr_Occurred()); if (PyErr_Occurred()) { @@ -184,8 +195,6 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) { PyObject *exc = PyErr_GetRaisedException(); - PyObject *current = NULL; - PyObject *dict = PyInterpreterState_GetDict(interp); if (dict == NULL) { goto error; @@ -197,7 +206,10 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) goto error; } if (ref != NULL) { + PyObject *current = NULL; int rc = PyWeakref_GetRef(ref, ¤t); + /* We only need "current" for pointer comparison. */ + Py_XDECREF(current); Py_DECREF(ref); if (rc < 0) { goto error; @@ -208,19 +220,17 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) } } - if (PyDict_DelItem(dict, INTERP_KEY) < 0) { - if (!PyErr_ExceptionMatches(PyExc_KeyError)) { - goto error; - } + /* We use None to identify that the module was previously loaded. */ + if (PyDict_SetItem(dict, INTERP_KEY, Py_None) < 0) { + goto error; } goto finally; error: - PyErr_Print(); + PyErr_WriteUnraisable(NULL); finally: - Py_XDECREF(current); PyErr_SetRaisedException(exc); } @@ -6947,14 +6957,19 @@ static PyTypeObject PyDateTime_DateTimeType = { }; /* --------------------------------------------------------------------------- - * Module methods and initialization. + * datetime C-API. */ -static PyMethodDef module_methods[] = { - {NULL, NULL} +static PyTypeObject * const capi_types[] = { + &PyDateTime_DateType, + &PyDateTime_DateTimeType, + &PyDateTime_TimeType, + &PyDateTime_DeltaType, + &PyDateTime_TZInfoType, + /* Indirectly, via the utc object. */ + &PyDateTime_TimeZoneType, }; - /* The C-API is process-global. This violates interpreter isolation * due to the objects stored here. Thus each of those objects must * be managed carefully. */ @@ -7004,6 +7019,11 @@ create_timezone_from_delta(int days, int sec, int ms, int normalize) return tz; } + +/* --------------------------------------------------------------------------- + * Module state lifecycle. + */ + static int init_state(datetime_state *st, PyObject *module, PyObject *old_module) { @@ -7105,38 +7125,110 @@ clear_state(datetime_state *st) return 0; } + +/* --------------------------------------------------------------------------- + * Global module state. + */ + +// If we make _PyStaticType_*ForExtension() public +// then all this should be managed by the runtime. + +static struct { + PyMutex mutex; + int64_t interp_count; +} _globals = {0}; + +static void +callback_for_interp_exit(void *Py_UNUSED(data)) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + + assert(_globals.interp_count > 0); + PyMutex_Lock(&_globals.mutex); + _globals.interp_count -= 1; + int final = !_globals.interp_count; + PyMutex_Unlock(&_globals.mutex); + + /* They must be done in reverse order so subclasses are finalized + * before base classes. */ + for (size_t i = Py_ARRAY_LENGTH(capi_types); i > 0; i--) { + PyTypeObject *type = capi_types[i-1]; + _PyStaticType_FiniForExtension(interp, type, final); + } +} + +static int +init_static_types(PyInterpreterState *interp, int reloading) +{ + if (reloading) { + return 0; + } + + // `&...` is not a constant expression according to a strict reading + // of C standards. Fill tp_base at run-time rather than statically. + // See https://bugs.python.org/issue40777 + PyDateTime_TimeZoneType.tp_base = &PyDateTime_TZInfoType; + PyDateTime_DateTimeType.tp_base = &PyDateTime_DateType; + + /* Bases classes must be initialized before subclasses, + * so capi_types must have the types in the appropriate order. */ + for (size_t i = 0; i < Py_ARRAY_LENGTH(capi_types); i++) { + PyTypeObject *type = capi_types[i]; + if (_PyStaticType_InitForExtension(interp, type) < 0) { + return -1; + } + } + + PyMutex_Lock(&_globals.mutex); + assert(_globals.interp_count >= 0); + _globals.interp_count += 1; + PyMutex_Unlock(&_globals.mutex); + + /* It could make sense to add a separate callback + * for each of the types. However, for now we can take the simpler + * approach of a single callback. */ + if (PyUnstable_AtExit(interp, callback_for_interp_exit, NULL) < 0) { + callback_for_interp_exit(NULL); + return -1; + } + + return 0; +} + + +/* --------------------------------------------------------------------------- + * Module methods and initialization. + */ + +static PyMethodDef module_methods[] = { + {NULL, NULL} +}; + + static int _datetime_exec(PyObject *module) { int rc = -1; datetime_state *st = get_module_state(module); + int reloading = 0; PyInterpreterState *interp = PyInterpreterState_Get(); - PyObject *old_module = get_current_module(interp); + PyObject *old_module = get_current_module(interp, &reloading); if (PyErr_Occurred()) { assert(old_module == NULL); goto error; } /* We actually set the "current" module right before a successful return. */ - // `&...` is not a constant expression according to a strict reading - // of C standards. Fill tp_base at run-time rather than statically. - // See https://bugs.python.org/issue40777 - PyDateTime_TimeZoneType.tp_base = &PyDateTime_TZInfoType; - PyDateTime_DateTimeType.tp_base = &PyDateTime_DateType; - - PyTypeObject *capi_types[] = { - &PyDateTime_DateType, - &PyDateTime_DateTimeType, - &PyDateTime_TimeType, - &PyDateTime_DeltaType, - &PyDateTime_TZInfoType, - /* Indirectly, via the utc object. */ - &PyDateTime_TimeZoneType, - }; + if (init_static_types(interp, reloading) < 0) { + goto error; + } for (size_t i = 0; i < Py_ARRAY_LENGTH(capi_types); i++) { - if (PyModule_AddType(module, capi_types[i]) < 0) { + PyTypeObject *type = capi_types[i]; + const char *name = _PyType_Name(type); + assert(name != NULL); + if (PyModule_AddObjectRef(module, name, (PyObject *)type) < 0) { goto error; } } @@ -7145,11 +7237,8 @@ _datetime_exec(PyObject *module) goto error; } - /* For now we only set the objects on the static types once. - * We will relax that once each types __dict__ is per-interpreter. */ #define DATETIME_ADD_MACRO(dict, c, value_expr) \ do { \ - if (PyDict_GetItemString(dict, c) == NULL) { \ assert(!PyErr_Occurred()); \ PyObject *value = (value_expr); \ if (value == NULL) { \ @@ -7160,30 +7249,29 @@ _datetime_exec(PyObject *module) goto error; \ } \ Py_DECREF(value); \ - } \ } while(0) /* timedelta values */ - PyObject *d = PyDateTime_DeltaType.tp_dict; + PyObject *d = _PyType_GetDict(&PyDateTime_DeltaType); DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); DATETIME_ADD_MACRO(d, "min", new_delta(-MAX_DELTA_DAYS, 0, 0, 0)); DATETIME_ADD_MACRO(d, "max", new_delta(MAX_DELTA_DAYS, 24*3600-1, 1000000-1, 0)); /* date values */ - d = PyDateTime_DateType.tp_dict; + d = _PyType_GetDict(&PyDateTime_DateType); DATETIME_ADD_MACRO(d, "min", new_date(1, 1, 1)); DATETIME_ADD_MACRO(d, "max", new_date(MAXYEAR, 12, 31)); DATETIME_ADD_MACRO(d, "resolution", new_delta(1, 0, 0, 0)); /* time values */ - d = PyDateTime_TimeType.tp_dict; + d = _PyType_GetDict(&PyDateTime_TimeType); DATETIME_ADD_MACRO(d, "min", new_time(0, 0, 0, 0, Py_None, 0)); DATETIME_ADD_MACRO(d, "max", new_time(23, 59, 59, 999999, Py_None, 0)); DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); /* datetime values */ - d = PyDateTime_DateTimeType.tp_dict; + d = _PyType_GetDict(&PyDateTime_DateTimeType); DATETIME_ADD_MACRO(d, "min", new_datetime(1, 1, 1, 0, 0, 0, 0, Py_None, 0)); DATETIME_ADD_MACRO(d, "max", new_datetime(MAXYEAR, 12, 31, 23, 59, 59, @@ -7191,7 +7279,7 @@ _datetime_exec(PyObject *module) DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); /* timezone values */ - d = PyDateTime_TimeZoneType.tp_dict; + d = _PyType_GetDict(&PyDateTime_TimeZoneType); if (PyDict_SetItemString(d, "utc", (PyObject *)&utc_timezone) < 0) { goto error; } @@ -7266,7 +7354,7 @@ _datetime_exec(PyObject *module) static PyModuleDef_Slot module_slots[] = { {Py_mod_exec, _datetime_exec}, - {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL}, }; @@ -7288,17 +7376,16 @@ module_clear(PyObject *mod) PyInterpreterState *interp = PyInterpreterState_Get(); clear_current_module(interp, mod); + // We take care of the static types via an interpreter atexit hook. + // See callback_for_interp_exit() above. + return 0; } static void module_free(void *mod) { - datetime_state *st = get_module_state((PyObject *)mod); - clear_state(st); - - PyInterpreterState *interp = PyInterpreterState_Get(); - clear_current_module(interp, (PyObject *)mod); + (void)module_clear((PyObject *)mod); } static PyModuleDef datetimemodule = { diff --git a/Objects/exceptions.c b/Objects/exceptions.c index f9cd577c1c16be..3a72cce1dff0c7 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3725,7 +3725,7 @@ _PyExc_FiniTypes(PyInterpreterState *interp) { for (Py_ssize_t i=Py_ARRAY_LENGTH(static_exceptions) - 1; i >= 0; i--) { PyTypeObject *exc = static_exceptions[i].exc; - _PyStaticType_Dealloc(interp, exc); + _PyStaticType_FiniBuiltin(interp, exc); } } diff --git a/Objects/object.c b/Objects/object.c index 5d53e9e5eaba4e..2e9962f4651e1c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2355,7 +2355,7 @@ _PyTypes_FiniTypes(PyInterpreterState *interp) // their base classes. for (Py_ssize_t i=Py_ARRAY_LENGTH(static_types)-1; i>=0; i--) { PyTypeObject *type = static_types[i]; - _PyStaticType_Dealloc(interp, type); + _PyStaticType_FiniBuiltin(interp, type); } } diff --git a/Objects/structseq.c b/Objects/structseq.c index ec5c5ab45ba813..d8289f2638db0f 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -718,7 +718,7 @@ _PyStructSequence_FiniBuiltin(PyInterpreterState *interp, PyTypeObject *type) return; } - _PyStaticType_Dealloc(interp, type); + _PyStaticType_FiniBuiltin(interp, type); if (_Py_IsMainInterpreter(interp)) { // Undo _PyStructSequence_InitBuiltinWithFlags(). diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 0095a79a2cafec..880ac6b9c009fe 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -131,93 +131,159 @@ type_from_ref(PyObject *ref) #ifndef NDEBUG static inline int -static_builtin_index_is_set(PyTypeObject *self) +managed_static_type_index_is_set(PyTypeObject *self) { return self->tp_subclasses != NULL; } #endif static inline size_t -static_builtin_index_get(PyTypeObject *self) +managed_static_type_index_get(PyTypeObject *self) { - assert(static_builtin_index_is_set(self)); + assert(managed_static_type_index_is_set(self)); /* We store a 1-based index so 0 can mean "not initialized". */ return (size_t)self->tp_subclasses - 1; } static inline void -static_builtin_index_set(PyTypeObject *self, size_t index) +managed_static_type_index_set(PyTypeObject *self, size_t index) { - assert(index < _Py_MAX_STATIC_BUILTIN_TYPES); + assert(index < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES); /* We store a 1-based index so 0 can mean "not initialized". */ self->tp_subclasses = (PyObject *)(index + 1); } static inline void -static_builtin_index_clear(PyTypeObject *self) +managed_static_type_index_clear(PyTypeObject *self) { self->tp_subclasses = NULL; } -static inline static_builtin_state * +static inline managed_static_type_state * static_builtin_state_get(PyInterpreterState *interp, PyTypeObject *self) { - return &(interp->types.builtins[static_builtin_index_get(self)]); + return &(interp->types.builtins.initialized[ + managed_static_type_index_get(self)]); +} + +static inline managed_static_type_state * +static_ext_type_state_get(PyInterpreterState *interp, PyTypeObject *self) +{ + return &(interp->types.for_extensions.initialized[ + managed_static_type_index_get(self)]); +} + +static managed_static_type_state * +managed_static_type_state_get(PyInterpreterState *interp, PyTypeObject *self) +{ + // It's probably a builtin type. + size_t index = managed_static_type_index_get(self); + managed_static_type_state *state = + &(interp->types.builtins.initialized[index]); + if (state->type == self) { + return state; + } + if (index > _Py_MAX_MANAGED_STATIC_EXT_TYPES) { + return state; + } + return &(interp->types.for_extensions.initialized[index]); } /* For static types we store some state in an array on each interpreter. */ -static_builtin_state * +managed_static_type_state * _PyStaticType_GetState(PyInterpreterState *interp, PyTypeObject *self) { assert(self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); - return static_builtin_state_get(interp, self); + return managed_static_type_state_get(interp, self); } /* Set the type's per-interpreter state. */ static void -static_builtin_state_init(PyInterpreterState *interp, PyTypeObject *self) +managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self, + int isbuiltin, int initial) { - if (_Py_IsMainInterpreter(interp)) { - assert(!static_builtin_index_is_set(self)); - static_builtin_index_set(self, interp->types.num_builtins_initialized); + size_t index; + if (initial) { + assert(!managed_static_type_index_is_set(self)); + if (isbuiltin) { + index = interp->types.builtins.num_initialized; + assert(index < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES); + } + else { + PyMutex_Lock(&interp->types.mutex); + index = interp->types.for_extensions.next_index; + interp->types.for_extensions.next_index++; + PyMutex_Unlock(&interp->types.mutex); + assert(index < _Py_MAX_MANAGED_STATIC_EXT_TYPES); + } + managed_static_type_index_set(self, index); } else { - assert(static_builtin_index_get(self) == - interp->types.num_builtins_initialized); + index = managed_static_type_index_get(self); + if (isbuiltin) { + assert(index == interp->types.builtins.num_initialized); + assert(index < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES); + } + else { + assert(index < _Py_MAX_MANAGED_STATIC_EXT_TYPES); + } } - static_builtin_state *state = static_builtin_state_get(interp, self); - /* It should only be called once for each builtin type. */ + managed_static_type_state *state = isbuiltin + ? &(interp->types.builtins.initialized[index]) + : &(interp->types.for_extensions.initialized[index]); + + /* It should only be called once for each builtin type per interpreter. */ assert(state->type == NULL); state->type = self; + state->isbuiltin = isbuiltin; /* state->tp_subclasses is left NULL until init_subclasses() sets it. */ /* state->tp_weaklist is left NULL until insert_head() or insert_after() (in weakrefobject.c) sets it. */ - interp->types.num_builtins_initialized++; + if (isbuiltin) { + interp->types.builtins.num_initialized++; + } + else { + interp->types.for_extensions.num_initialized++; + } } /* Reset the type's per-interpreter state. - This basically undoes what static_builtin_state_init() did. */ + This basically undoes what managed_static_type_state_init() did. */ static void -static_builtin_state_clear(PyInterpreterState *interp, PyTypeObject *self) +managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self, + int isbuiltin, int final) { - static_builtin_state *state = static_builtin_state_get(interp, self); + managed_static_type_state *state = isbuiltin + ? static_builtin_state_get(interp, self) + : static_ext_type_state_get(interp, self); assert(state->type != NULL); state->type = NULL; assert(state->tp_weaklist == NULL); // It was already cleared out. - if (_Py_IsMainInterpreter(interp)) { - static_builtin_index_clear(self); + if (final) { + managed_static_type_index_clear(self); } - assert(interp->types.num_builtins_initialized > 0); - interp->types.num_builtins_initialized--; + if (isbuiltin) { + assert(interp->types.builtins.num_initialized > 0); + interp->types.builtins.num_initialized--; + } + else { + PyMutex_Lock(&interp->types.mutex); + assert(interp->types.for_extensions.num_initialized > 0); + interp->types.for_extensions.num_initialized--; + if (interp->types.for_extensions.num_initialized == 0) { + interp->types.for_extensions.next_index = 0; + } + PyMutex_Unlock(&interp->types.mutex); + } } -// Also see _PyStaticType_InitBuiltin() and _PyStaticType_Dealloc(). +// Also see _PyStaticType_InitBuiltin() and _PyStaticType_FiniBuiltin(). /* end static builtin helpers */ @@ -227,7 +293,7 @@ start_readying(PyTypeObject *type) { if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = static_builtin_state_get(interp, type); + managed_static_type_state *state = managed_static_type_state_get(interp, type); assert(state != NULL); assert(!state->readying); state->readying = 1; @@ -242,7 +308,7 @@ stop_readying(PyTypeObject *type) { if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = static_builtin_state_get(interp, type); + managed_static_type_state *state = managed_static_type_state_get(interp, type); assert(state != NULL); assert(state->readying); state->readying = 0; @@ -257,7 +323,7 @@ is_readying(PyTypeObject *type) { if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = static_builtin_state_get(interp, type); + managed_static_type_state *state = managed_static_type_state_get(interp, type); assert(state != NULL); return state->readying; } @@ -272,7 +338,7 @@ lookup_tp_dict(PyTypeObject *self) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); assert(state != NULL); return state->tp_dict; } @@ -298,7 +364,7 @@ set_tp_dict(PyTypeObject *self, PyObject *dict) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); assert(state != NULL); state->tp_dict = dict; return; @@ -311,7 +377,7 @@ clear_tp_dict(PyTypeObject *self) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); assert(state != NULL); Py_CLEAR(state->tp_dict); return; @@ -340,13 +406,13 @@ _PyType_GetBases(PyTypeObject *self) } static inline void -set_tp_bases(PyTypeObject *self, PyObject *bases) +set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_CheckExact(bases)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. - assert(_Py_IsMainInterpreter(_PyInterpreterState_GET())); + assert(initial); assert(self->tp_bases == NULL); if (PyTuple_GET_SIZE(bases) == 0) { assert(self->tp_base == NULL); @@ -363,10 +429,10 @@ set_tp_bases(PyTypeObject *self, PyObject *bases) } static inline void -clear_tp_bases(PyTypeObject *self) +clear_tp_bases(PyTypeObject *self, int final) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { - if (_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + if (final) { if (self->tp_bases != NULL) { if (PyTuple_GET_SIZE(self->tp_bases) == 0) { Py_CLEAR(self->tp_bases); @@ -413,13 +479,13 @@ _PyType_GetMRO(PyTypeObject *self) } static inline void -set_tp_mro(PyTypeObject *self, PyObject *mro) +set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_mro can probably be statically allocated for each // static builtin type. - assert(_Py_IsMainInterpreter(_PyInterpreterState_GET())); + assert(initial); assert(self->tp_mro == NULL); /* Other checks are done via set_tp_bases. */ _Py_SetImmortal(mro); @@ -428,10 +494,10 @@ set_tp_mro(PyTypeObject *self, PyObject *mro) } static inline void -clear_tp_mro(PyTypeObject *self) +clear_tp_mro(PyTypeObject *self, int final) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { - if (_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + if (final) { if (self->tp_mro != NULL) { if (PyTuple_GET_SIZE(self->tp_mro) == 0) { Py_CLEAR(self->tp_mro); @@ -457,7 +523,7 @@ init_tp_subclasses(PyTypeObject *self) } if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); state->tp_subclasses = subclasses; return subclasses; } @@ -473,7 +539,7 @@ clear_tp_subclasses(PyTypeObject *self) has no subclass. */ if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); Py_CLEAR(state->tp_subclasses); return; } @@ -485,7 +551,7 @@ lookup_tp_subclasses(PyTypeObject *self) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); assert(state != NULL); return state->tp_subclasses; } @@ -774,10 +840,10 @@ _PyTypes_Fini(PyInterpreterState *interp) struct type_cache *cache = &interp->types.type_cache; type_cache_clear(cache, NULL); - assert(interp->types.num_builtins_initialized == 0); + assert(interp->types.builtins.num_initialized == 0); // All the static builtin types should have been finalized already. - for (size_t i = 0; i < _Py_MAX_STATIC_BUILTIN_TYPES; i++) { - assert(interp->types.builtins[i].type == NULL); + for (size_t i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { + assert(interp->types.builtins.initialized[i].type == NULL); } } @@ -1444,7 +1510,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) Py_XDECREF(tuple); if (res < 0) { - set_tp_mro(type, old_mro); + set_tp_mro(type, old_mro, 0); Py_DECREF(new_mro); return -1; } @@ -1545,7 +1611,7 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, void *context) assert(old_bases != NULL); PyTypeObject *old_base = type->tp_base; - set_tp_bases(type, Py_NewRef(new_bases)); + set_tp_bases(type, Py_NewRef(new_bases), 0); type->tp_base = (PyTypeObject *)Py_NewRef(new_base); PyObject *temp = PyList_New(0); @@ -1593,7 +1659,7 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, void *context) "", 2, 3, &cls, &new_mro, &old_mro); /* Do not rollback if cls has a newer version of MRO. */ if (lookup_tp_mro(cls) == new_mro) { - set_tp_mro(cls, Py_XNewRef(old_mro)); + set_tp_mro(cls, Py_XNewRef(old_mro), 0); Py_DECREF(new_mro); } } @@ -1603,7 +1669,7 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, void *context) if (lookup_tp_bases(type) == new_bases) { assert(type->tp_base == new_base); - set_tp_bases(type, old_bases); + set_tp_bases(type, old_bases, 0); type->tp_base = old_base; Py_DECREF(new_bases); @@ -3084,7 +3150,7 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal_unlocked(PyTypeObject *type, PyObject **p_old_mro) +mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); @@ -3107,7 +3173,7 @@ mro_internal_unlocked(PyTypeObject *type, PyObject **p_old_mro) return 0; } - set_tp_mro(type, new_mro); + set_tp_mro(type, new_mro, initial); type_mro_modified(type, new_mro); /* corner case: the super class might have been hidden @@ -3137,7 +3203,7 @@ mro_internal(PyTypeObject *type, PyObject **p_old_mro) { int res; BEGIN_TYPE_LOCK() - res = mro_internal_unlocked(type, p_old_mro); + res = mro_internal_unlocked(type, 0, p_old_mro); END_TYPE_LOCK() return res; } @@ -3732,7 +3798,7 @@ type_new_alloc(type_new_ctx *ctx) type->tp_as_mapping = &et->as_mapping; type->tp_as_buffer = &et->as_buffer; - set_tp_bases(type, Py_NewRef(ctx->bases)); + set_tp_bases(type, Py_NewRef(ctx->bases), 1); type->tp_base = (PyTypeObject *)Py_NewRef(ctx->base); type->tp_dealloc = subtype_dealloc; @@ -4722,7 +4788,7 @@ _PyType_FromMetaclass_impl( /* Set slots we have prepared */ type->tp_base = (PyTypeObject *)Py_NewRef(base); - set_tp_bases(type, bases); + set_tp_bases(type, bases, 1); bases = NULL; // We give our reference to bases to the type type->tp_doc = tp_doc; @@ -5627,7 +5693,7 @@ type_dealloc_common(PyTypeObject *type) static void -clear_static_tp_subclasses(PyTypeObject *type) +clear_static_tp_subclasses(PyTypeObject *type, int isbuiltin) { PyObject *subclasses = lookup_tp_subclasses(type); if (subclasses == NULL) { @@ -5664,47 +5730,64 @@ clear_static_tp_subclasses(PyTypeObject *type) continue; } // All static builtin subtypes should have been finalized already. - assert(!(subclass->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); + assert(!isbuiltin || !(subclass->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); Py_DECREF(subclass); } +#else + (void)isbuiltin; #endif clear_tp_subclasses(type); } static void -clear_static_type_objects(PyInterpreterState *interp, PyTypeObject *type) +clear_static_type_objects(PyInterpreterState *interp, PyTypeObject *type, + int isbuiltin, int final) { - if (_Py_IsMainInterpreter(interp)) { + if (final) { Py_CLEAR(type->tp_cache); } clear_tp_dict(type); - clear_tp_bases(type); - clear_tp_mro(type); - clear_static_tp_subclasses(type); + clear_tp_bases(type, final); + clear_tp_mro(type, final); + clear_static_tp_subclasses(type, isbuiltin); } -void -_PyStaticType_Dealloc(PyInterpreterState *interp, PyTypeObject *type) + +static void +fini_static_type(PyInterpreterState *interp, PyTypeObject *type, + int isbuiltin, int final) { assert(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); assert(_Py_IsImmortal((PyObject *)type)); type_dealloc_common(type); - clear_static_type_objects(interp, type); + clear_static_type_objects(interp, type, isbuiltin, final); - if (_Py_IsMainInterpreter(interp)) { + if (final) { type->tp_flags &= ~Py_TPFLAGS_READY; type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_version_tag = 0; } _PyStaticType_ClearWeakRefs(interp, type); - static_builtin_state_clear(interp, type); + managed_static_type_state_clear(interp, type, isbuiltin, final); /* We leave _Py_TPFLAGS_STATIC_BUILTIN set on tp_flags. */ } +void +_PyStaticType_FiniForExtension(PyInterpreterState *interp, PyTypeObject *type, int final) +{ + fini_static_type(interp, type, 0, final); +} + +void +_PyStaticType_FiniBuiltin(PyInterpreterState *interp, PyTypeObject *type) +{ + fini_static_type(interp, type, 1, _Py_IsMainInterpreter(interp)); +} + static void type_dealloc(PyObject *self) @@ -7758,10 +7841,10 @@ type_ready_set_type(PyTypeObject *type) } static int -type_ready_set_bases(PyTypeObject *type) +type_ready_set_bases(PyTypeObject *type, int initial) { if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { - if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + if (!initial) { assert(lookup_tp_bases(type) != NULL); return 0; } @@ -7780,7 +7863,7 @@ type_ready_set_bases(PyTypeObject *type) if (bases == NULL) { return -1; } - set_tp_bases(type, bases); + set_tp_bases(type, bases, 1); } return 0; } @@ -7890,12 +7973,12 @@ type_ready_preheader(PyTypeObject *type) } static int -type_ready_mro(PyTypeObject *type) +type_ready_mro(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { - if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + if (!initial) { assert(lookup_tp_mro(type) != NULL); return 0; } @@ -7903,7 +7986,7 @@ type_ready_mro(PyTypeObject *type) } /* Calculate method resolution order */ - if (mro_internal_unlocked(type, NULL) < 0) { + if (mro_internal_unlocked(type, initial, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -8058,7 +8141,7 @@ type_ready_add_subclasses(PyTypeObject *type) // Set tp_new and the "__new__" key in the type dictionary. // Use the Py_TPFLAGS_DISALLOW_INSTANTIATION flag. static int -type_ready_set_new(PyTypeObject *type, int rerunbuiltin) +type_ready_set_new(PyTypeObject *type, int initial) { PyTypeObject *base = type->tp_base; /* The condition below could use some explanation. @@ -8080,7 +8163,7 @@ type_ready_set_new(PyTypeObject *type, int rerunbuiltin) if (!(type->tp_flags & Py_TPFLAGS_DISALLOW_INSTANTIATION)) { if (type->tp_new != NULL) { - if (!rerunbuiltin || base == NULL || type->tp_new != base->tp_new) { + if (initial || base == NULL || type->tp_new != base->tp_new) { // If "__new__" key does not exists in the type dictionary, // set it to tp_new_wrapper(). if (add_tp_new_wrapper(type) < 0) { @@ -8162,7 +8245,7 @@ type_ready_post_checks(PyTypeObject *type) static int -type_ready(PyTypeObject *type, int rerunbuiltin) +type_ready(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); @@ -8192,19 +8275,19 @@ type_ready(PyTypeObject *type, int rerunbuiltin) if (type_ready_set_type(type) < 0) { goto error; } - if (type_ready_set_bases(type) < 0) { + if (type_ready_set_bases(type, initial) < 0) { goto error; } - if (type_ready_mro(type) < 0) { + if (type_ready_mro(type, initial) < 0) { goto error; } - if (type_ready_set_new(type, rerunbuiltin) < 0) { + if (type_ready_set_new(type, initial) < 0) { goto error; } if (type_ready_fill_dict(type) < 0) { goto error; } - if (!rerunbuiltin) { + if (initial) { if (type_ready_inherit(type) < 0) { goto error; } @@ -8218,7 +8301,7 @@ type_ready(PyTypeObject *type, int rerunbuiltin) if (type_ready_add_subclasses(type) < 0) { goto error; } - if (!rerunbuiltin) { + if (initial) { if (type_ready_managed_dict(type) < 0) { goto error; } @@ -8258,7 +8341,7 @@ PyType_Ready(PyTypeObject *type) int res; BEGIN_TYPE_LOCK() if (!(type->tp_flags & Py_TPFLAGS_READY)) { - res = type_ready(type, 0); + res = type_ready(type, 1); } else { res = 0; assert(_PyType_CheckConsistency(type)); @@ -8267,17 +8350,18 @@ PyType_Ready(PyTypeObject *type) return res; } -int -_PyStaticType_InitBuiltin(PyInterpreterState *interp, PyTypeObject *self) + +static int +init_static_type(PyInterpreterState *interp, PyTypeObject *self, + int isbuiltin, int initial) { assert(_Py_IsImmortal((PyObject *)self)); assert(!(self->tp_flags & Py_TPFLAGS_HEAPTYPE)); assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_DICT)); assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_WEAKREF)); - int ismain = _Py_IsMainInterpreter(interp); if ((self->tp_flags & Py_TPFLAGS_READY) == 0) { - assert(ismain); + assert(initial); self->tp_flags |= _Py_TPFLAGS_STATIC_BUILTIN; self->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; @@ -8287,24 +8371,36 @@ _PyStaticType_InitBuiltin(PyInterpreterState *interp, PyTypeObject *self) self->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; } else { - assert(!ismain); + assert(!initial); assert(self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); assert(self->tp_flags & Py_TPFLAGS_VALID_VERSION_TAG); } - static_builtin_state_init(interp, self); + managed_static_type_state_init(interp, self, isbuiltin, initial); int res; BEGIN_TYPE_LOCK(); - res = type_ready(self, !ismain); + res = type_ready(self, initial); END_TYPE_LOCK() if (res < 0) { _PyStaticType_ClearWeakRefs(interp, self); - static_builtin_state_clear(interp, self); + managed_static_type_state_clear(interp, self, isbuiltin, initial); } return res; } +int +_PyStaticType_InitForExtension(PyInterpreterState *interp, PyTypeObject *self) +{ + return init_static_type(interp, self, 0, ((self->tp_flags & Py_TPFLAGS_READY) == 0)); +} + +int +_PyStaticType_InitBuiltin(PyInterpreterState *interp, PyTypeObject *self) +{ + return init_static_type(interp, self, 1, _Py_IsMainInterpreter(interp)); +} + static int add_subclass(PyTypeObject *base, PyTypeObject *type) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 53160f1799f2cc..3b0b4173408724 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15522,9 +15522,9 @@ unicode_is_finalizing(void) void _PyUnicode_FiniTypes(PyInterpreterState *interp) { - _PyStaticType_Dealloc(interp, &EncodingMapType); - _PyStaticType_Dealloc(interp, &PyFieldNameIter_Type); - _PyStaticType_Dealloc(interp, &PyFormatterIter_Type); + _PyStaticType_FiniBuiltin(interp, &EncodingMapType); + _PyStaticType_FiniBuiltin(interp, &PyFieldNameIter_Type); + _PyStaticType_FiniBuiltin(interp, &PyFormatterIter_Type); } diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index 88afaec86827ed..3b027e1b518ba6 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -1066,7 +1066,7 @@ PyObject_ClearWeakRefs(PyObject *object) void _PyStaticType_ClearWeakRefs(PyInterpreterState *interp, PyTypeObject *type) { - static_builtin_state *state = _PyStaticType_GetState(interp, type); + managed_static_type_state *state = _PyStaticType_GetState(interp, type); PyObject **list = _PyStaticType_GET_WEAKREFS_LISTPTR(state); // This is safe to do without holding the lock in free-threaded builds; // there is only one thread running and no new threads can be created. diff --git a/Python/crossinterp_exceptions.h b/Python/crossinterp_exceptions.h index 6ecc10c7955fd8..278511da615c75 100644 --- a/Python/crossinterp_exceptions.h +++ b/Python/crossinterp_exceptions.h @@ -90,6 +90,6 @@ static void fini_exceptions(PyInterpreterState *interp) { // Likewise with _fini_not_shareable_error_type(). - _PyStaticType_Dealloc(interp, &_PyExc_InterpreterNotFoundError); - _PyStaticType_Dealloc(interp, &_PyExc_InterpreterError); + _PyStaticType_FiniBuiltin(interp, &_PyExc_InterpreterNotFoundError); + _PyStaticType_FiniBuiltin(interp, &_PyExc_InterpreterError); } diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 711ae343a8d876..4586a59f6ac2ef 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -307,6 +307,7 @@ Python/crossinterp_exceptions.h - PyExc_InterpreterNotFoundError - Modules/_datetimemodule.c - zero_delta - Modules/_datetimemodule.c - utc_timezone - Modules/_datetimemodule.c - capi - +Modules/_datetimemodule.c - _globals - Objects/boolobject.c - _Py_FalseStruct - Objects/boolobject.c - _Py_TrueStruct - Objects/dictobject.c - empty_keys_struct - diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index a3bdf0396fd3e1..466f25daa14dc6 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -217,6 +217,7 @@ Modules/_datetimemodule.c - max_fold_seconds - Modules/_datetimemodule.c datetime_isoformat specs - Modules/_datetimemodule.c parse_hh_mm_ss_ff correction - Modules/_datetimemodule.c time_isoformat specs - +Modules/_datetimemodule.c - capi_types - Modules/_decimal/_decimal.c - cond_map_template - Modules/_decimal/_decimal.c - dec_signal_string - Modules/_decimal/_decimal.c - dflt_ctx - From 31a4fb3c74a0284436343858803b54471e2dc9c7 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 4 Jun 2024 03:10:15 +0200 Subject: [PATCH 069/373] gh-119724: Revert "bpo-45759: Better error messages for non-matching 'elif'/'else' statements (#29513)" (#119974) This reverts commit 1c8f912ebdfdb146cd7dd2d7a3a67d2c5045ddb0. --- Grammar/python.gram | 5 - Lib/test/test_syntax.py | 61 +- ...-06-03-13-25-04.gh-issue-119724.EH1dkA.rst | 3 + Parser/parser.c | 998 ++++++++---------- 4 files changed, 455 insertions(+), 612 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-03-13-25-04.gh-issue-119724.EH1dkA.rst diff --git a/Grammar/python.gram b/Grammar/python.gram index 1734479276dd5b..b14e5dd096cdf4 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -127,7 +127,6 @@ simple_stmt[stmt_ty] (memo): | &'nonlocal' nonlocal_stmt compound_stmt[stmt_ty]: - | invalid_compound_stmt | &('def' | '@' | 'async') function_def | &'if' if_stmt | &('class' | '@') class_def @@ -1323,10 +1322,6 @@ invalid_import_from_targets: | token=NEWLINE { RAISE_SYNTAX_ERROR_STARTING_FROM(token, "Expected one or more names after 'import'") } -invalid_compound_stmt: - | a='elif' named_expression ':' { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "'elif' must match an if-statement here") } - | a='else' ':' { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "'else' must match a valid statement here") } - invalid_with_stmt: | ['async'] 'with' ','.(expression ['as' star_target])+ NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | ['async'] 'with' '(' ','.(expressions ['as' star_target])+ ','? ')' NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index b978838ea7003f..491f7fd7908e97 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1853,28 +1853,6 @@ Traceback (most recent call last): SyntaxError: positional patterns follow keyword patterns -Non-matching 'elif'/'else' statements: - - >>> if a == b: - ... ... - ... elif a == c: - Traceback (most recent call last): - SyntaxError: 'elif' must match an if-statement here - - >>> if x == y: - ... ... - ... else: - Traceback (most recent call last): - SyntaxError: 'else' must match a valid statement here - - >>> elif m == n: - Traceback (most recent call last): - SyntaxError: 'elif' must match an if-statement here - - >>> else: - Traceback (most recent call last): - SyntaxError: 'else' must match a valid statement here - Uses of the star operator which should fail: A[:*b] @@ -2167,8 +2145,8 @@ def _check_error(self, code, errtext, lineno=None, offset=None, end_lineno=None, end_offset=None): """Check that compiling code raises SyntaxError with errtext. - errtext is a regular expression that must be present in the - test of the exception raised. If subclass is specified, it + errtest is a regular expression that must be present in the + test of the exception raised. If subclass is specified it is the expected subclass of SyntaxError (e.g. IndentationError). """ try: @@ -2192,22 +2170,6 @@ def _check_error(self, code, errtext, else: self.fail("compile() did not raise SyntaxError") - def _check_noerror(self, code, - errtext="compile() raised unexpected SyntaxError", - filename="", mode="exec", subclass=None): - """Check that compiling code does not raise a SyntaxError. - - errtext is the message passed to self.fail if there is - a SyntaxError. If the subclass parameter is specified, - it is the subclass of SyntaxError (e.g. IndentationError) - that the raised error is checked against. - """ - try: - compile(code, filename, mode) - except SyntaxError as err: - if (not subclass) or isinstance(err, subclass): - self.fail(errtext) - def test_expression_with_assignment(self): self._check_error( "print(end1 + end2 = ' ')", @@ -2609,25 +2571,6 @@ def test_syntax_error_on_deeply_nested_blocks(self): """ self._check_error(source, "too many statically nested blocks") - def test_syntax_error_non_matching_elif_else_statements(self): - # Check bpo-45759: 'elif' statements that doesn't match an - # if-statement or 'else' statements that doesn't match any - # valid else-able statement (e.g. 'while') - self._check_error( - "elif m == n:\n ...", - "'elif' must match an if-statement here") - self._check_error( - "else:\n ...", - "'else' must match a valid statement here") - self._check_noerror("if a == b:\n ...\nelif a == c:\n ...") - self._check_noerror("if x == y:\n ...\nelse:\n ...") - self._check_error( - "else = 123", - "invalid syntax") - self._check_error( - "elif 55 = 123", - "cannot assign to literal here") - @support.cpython_only def test_error_on_parser_stack_overflow(self): source = "-" * 100000 + "4" diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-03-13-25-04.gh-issue-119724.EH1dkA.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-03-13-25-04.gh-issue-119724.EH1dkA.rst new file mode 100644 index 00000000000000..78dc48da934cf6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-03-13-25-04.gh-issue-119724.EH1dkA.rst @@ -0,0 +1,3 @@ +Reverted improvements to error messages for ``elif``/``else`` statements not +matching any valid statements, which made in hard to locate the syntax +errors inside those ``elif``/``else`` blocks. diff --git a/Parser/parser.c b/Parser/parser.c index fec3353d30cb4d..05cd93c2c92f29 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -21,28 +21,28 @@ static KeywordToken *reserved_keywords[] = { (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) { - {"if", 662}, - {"as", 660}, - {"in", 673}, + {"if", 660}, + {"as", 658}, + {"in", 671}, {"or", 581}, {"is", 589}, {NULL, -1}, }, (KeywordToken[]) { {"del", 613}, - {"def", 677}, - {"for", 672}, - {"try", 644}, + {"def", 675}, + {"for", 670}, + {"try", 642}, {"and", 582}, - {"not", 681}, + {"not", 679}, {NULL, -1}, }, (KeywordToken[]) { {"from", 621}, {"pass", 504}, - {"with", 635}, - {"elif", 664}, - {"else", 665}, + {"with", 633}, + {"elif", 662}, + {"else", 663}, {"None", 611}, {"True", 610}, {NULL, -1}, @@ -51,9 +51,9 @@ static KeywordToken *reserved_keywords[] = { {"raise", 525}, {"yield", 580}, {"break", 508}, - {"async", 676}, - {"class", 679}, - {"while", 667}, + {"async", 674}, + {"class", 677}, + {"while", 665}, {"False", 612}, {"await", 590}, {NULL, -1}, @@ -63,12 +63,12 @@ static KeywordToken *reserved_keywords[] = { {"import", 622}, {"assert", 529}, {"global", 526}, - {"except", 657}, + {"except", 655}, {"lambda", 609}, {NULL, -1}, }, (KeywordToken[]) { - {"finally", 653}, + {"finally", 651}, {NULL, -1}, }, (KeywordToken[]) { @@ -308,316 +308,315 @@ static char *soft_keywords[] = { #define invalid_group_type 1221 #define invalid_import_type 1222 #define invalid_import_from_targets_type 1223 -#define invalid_compound_stmt_type 1224 -#define invalid_with_stmt_type 1225 -#define invalid_with_stmt_indent_type 1226 -#define invalid_try_stmt_type 1227 -#define invalid_except_stmt_type 1228 -#define invalid_finally_stmt_type 1229 -#define invalid_except_stmt_indent_type 1230 -#define invalid_except_star_stmt_indent_type 1231 -#define invalid_match_stmt_type 1232 -#define invalid_case_block_type 1233 -#define invalid_as_pattern_type 1234 -#define invalid_class_pattern_type 1235 -#define invalid_class_argument_pattern_type 1236 -#define invalid_if_stmt_type 1237 -#define invalid_elif_stmt_type 1238 -#define invalid_else_stmt_type 1239 -#define invalid_while_stmt_type 1240 -#define invalid_for_stmt_type 1241 -#define invalid_def_raw_type 1242 -#define invalid_class_def_raw_type 1243 -#define invalid_double_starred_kvpairs_type 1244 -#define invalid_kvpair_type 1245 -#define invalid_starred_expression_unpacking_type 1246 -#define invalid_starred_expression_type 1247 -#define invalid_replacement_field_type 1248 -#define invalid_conversion_character_type 1249 -#define invalid_arithmetic_type 1250 -#define invalid_factor_type 1251 -#define invalid_type_params_type 1252 -#define _loop0_1_type 1253 -#define _loop0_2_type 1254 -#define _loop1_3_type 1255 -#define _loop0_5_type 1256 -#define _gather_4_type 1257 -#define _tmp_6_type 1258 -#define _tmp_7_type 1259 -#define _tmp_8_type 1260 -#define _tmp_9_type 1261 -#define _tmp_10_type 1262 -#define _tmp_11_type 1263 -#define _tmp_12_type 1264 -#define _tmp_13_type 1265 -#define _loop1_14_type 1266 -#define _tmp_15_type 1267 -#define _tmp_16_type 1268 -#define _tmp_17_type 1269 -#define _loop0_19_type 1270 -#define _gather_18_type 1271 -#define _loop0_21_type 1272 -#define _gather_20_type 1273 -#define _tmp_22_type 1274 -#define _tmp_23_type 1275 -#define _loop0_24_type 1276 -#define _loop1_25_type 1277 -#define _loop0_27_type 1278 -#define _gather_26_type 1279 -#define _tmp_28_type 1280 -#define _loop0_30_type 1281 -#define _gather_29_type 1282 -#define _tmp_31_type 1283 -#define _loop1_32_type 1284 -#define _tmp_33_type 1285 -#define _tmp_34_type 1286 -#define _tmp_35_type 1287 -#define _loop0_36_type 1288 -#define _loop0_37_type 1289 -#define _loop0_38_type 1290 -#define _loop1_39_type 1291 -#define _loop0_40_type 1292 -#define _loop1_41_type 1293 -#define _loop1_42_type 1294 -#define _loop1_43_type 1295 -#define _loop0_44_type 1296 -#define _loop1_45_type 1297 -#define _loop0_46_type 1298 -#define _loop1_47_type 1299 -#define _loop0_48_type 1300 -#define _loop0_49_type 1301 -#define _loop1_50_type 1302 -#define _loop0_52_type 1303 -#define _gather_51_type 1304 -#define _loop0_54_type 1305 -#define _gather_53_type 1306 -#define _loop0_56_type 1307 -#define _gather_55_type 1308 -#define _loop0_58_type 1309 -#define _gather_57_type 1310 -#define _tmp_59_type 1311 -#define _loop1_60_type 1312 -#define _loop1_61_type 1313 -#define _tmp_62_type 1314 -#define _tmp_63_type 1315 -#define _loop1_64_type 1316 -#define _loop0_66_type 1317 -#define _gather_65_type 1318 -#define _tmp_67_type 1319 -#define _tmp_68_type 1320 -#define _tmp_69_type 1321 -#define _tmp_70_type 1322 -#define _loop0_72_type 1323 -#define _gather_71_type 1324 -#define _loop0_74_type 1325 -#define _gather_73_type 1326 -#define _tmp_75_type 1327 -#define _loop0_77_type 1328 -#define _gather_76_type 1329 -#define _loop0_79_type 1330 -#define _gather_78_type 1331 -#define _loop0_81_type 1332 -#define _gather_80_type 1333 -#define _loop1_82_type 1334 -#define _loop1_83_type 1335 -#define _loop0_85_type 1336 -#define _gather_84_type 1337 -#define _loop1_86_type 1338 -#define _loop1_87_type 1339 -#define _loop1_88_type 1340 -#define _tmp_89_type 1341 -#define _loop0_91_type 1342 -#define _gather_90_type 1343 -#define _tmp_92_type 1344 -#define _tmp_93_type 1345 -#define _tmp_94_type 1346 -#define _tmp_95_type 1347 -#define _tmp_96_type 1348 -#define _tmp_97_type 1349 -#define _loop0_98_type 1350 -#define _loop0_99_type 1351 -#define _loop0_100_type 1352 -#define _loop1_101_type 1353 -#define _loop0_102_type 1354 -#define _loop1_103_type 1355 -#define _loop1_104_type 1356 -#define _loop1_105_type 1357 -#define _loop0_106_type 1358 -#define _loop1_107_type 1359 -#define _loop0_108_type 1360 -#define _loop1_109_type 1361 -#define _loop0_110_type 1362 -#define _loop1_111_type 1363 -#define _loop0_112_type 1364 -#define _loop0_113_type 1365 -#define _loop1_114_type 1366 -#define _tmp_115_type 1367 -#define _loop0_117_type 1368 -#define _gather_116_type 1369 -#define _loop1_118_type 1370 -#define _loop0_119_type 1371 -#define _loop0_120_type 1372 -#define _tmp_121_type 1373 -#define _loop0_123_type 1374 -#define _gather_122_type 1375 -#define _tmp_124_type 1376 -#define _loop0_126_type 1377 -#define _gather_125_type 1378 -#define _loop0_128_type 1379 -#define _gather_127_type 1380 -#define _loop0_130_type 1381 -#define _gather_129_type 1382 -#define _loop0_132_type 1383 -#define _gather_131_type 1384 -#define _loop0_133_type 1385 -#define _loop0_135_type 1386 -#define _gather_134_type 1387 -#define _loop1_136_type 1388 -#define _tmp_137_type 1389 -#define _loop0_139_type 1390 -#define _gather_138_type 1391 -#define _loop0_141_type 1392 -#define _gather_140_type 1393 -#define _loop0_143_type 1394 -#define _gather_142_type 1395 -#define _loop0_145_type 1396 -#define _gather_144_type 1397 -#define _loop0_147_type 1398 -#define _gather_146_type 1399 -#define _tmp_148_type 1400 -#define _tmp_149_type 1401 -#define _loop0_151_type 1402 -#define _gather_150_type 1403 -#define _tmp_152_type 1404 -#define _tmp_153_type 1405 -#define _tmp_154_type 1406 -#define _tmp_155_type 1407 -#define _tmp_156_type 1408 -#define _tmp_157_type 1409 -#define _tmp_158_type 1410 -#define _tmp_159_type 1411 -#define _tmp_160_type 1412 -#define _tmp_161_type 1413 -#define _loop0_162_type 1414 -#define _loop0_163_type 1415 -#define _loop0_164_type 1416 -#define _tmp_165_type 1417 -#define _tmp_166_type 1418 -#define _tmp_167_type 1419 -#define _tmp_168_type 1420 -#define _loop0_169_type 1421 -#define _loop0_170_type 1422 -#define _loop0_171_type 1423 -#define _loop1_172_type 1424 -#define _tmp_173_type 1425 -#define _loop0_174_type 1426 -#define _tmp_175_type 1427 -#define _loop0_176_type 1428 -#define _loop1_177_type 1429 -#define _tmp_178_type 1430 -#define _tmp_179_type 1431 -#define _tmp_180_type 1432 -#define _loop0_181_type 1433 -#define _tmp_182_type 1434 -#define _tmp_183_type 1435 -#define _loop1_184_type 1436 -#define _tmp_185_type 1437 -#define _loop0_186_type 1438 -#define _loop0_187_type 1439 -#define _loop0_188_type 1440 -#define _loop0_190_type 1441 -#define _gather_189_type 1442 -#define _tmp_191_type 1443 -#define _loop0_192_type 1444 -#define _tmp_193_type 1445 -#define _loop0_194_type 1446 -#define _loop1_195_type 1447 -#define _loop1_196_type 1448 -#define _tmp_197_type 1449 -#define _tmp_198_type 1450 -#define _loop0_199_type 1451 -#define _tmp_200_type 1452 -#define _tmp_201_type 1453 -#define _tmp_202_type 1454 -#define _tmp_203_type 1455 -#define _loop0_205_type 1456 -#define _gather_204_type 1457 -#define _loop0_207_type 1458 -#define _gather_206_type 1459 -#define _loop0_209_type 1460 -#define _gather_208_type 1461 -#define _loop0_211_type 1462 -#define _gather_210_type 1463 -#define _loop0_213_type 1464 -#define _gather_212_type 1465 -#define _tmp_214_type 1466 -#define _loop0_215_type 1467 -#define _loop1_216_type 1468 -#define _tmp_217_type 1469 -#define _loop0_218_type 1470 -#define _loop1_219_type 1471 -#define _tmp_220_type 1472 -#define _tmp_221_type 1473 -#define _tmp_222_type 1474 -#define _tmp_223_type 1475 -#define _tmp_224_type 1476 -#define _tmp_225_type 1477 -#define _tmp_226_type 1478 -#define _tmp_227_type 1479 -#define _tmp_228_type 1480 -#define _tmp_229_type 1481 -#define _tmp_230_type 1482 -#define _loop0_232_type 1483 -#define _gather_231_type 1484 -#define _tmp_233_type 1485 -#define _tmp_234_type 1486 -#define _tmp_235_type 1487 -#define _tmp_236_type 1488 -#define _tmp_237_type 1489 -#define _tmp_238_type 1490 -#define _tmp_239_type 1491 -#define _loop0_240_type 1492 -#define _tmp_241_type 1493 -#define _tmp_242_type 1494 -#define _tmp_243_type 1495 -#define _tmp_244_type 1496 -#define _tmp_245_type 1497 -#define _tmp_246_type 1498 -#define _tmp_247_type 1499 -#define _tmp_248_type 1500 -#define _tmp_249_type 1501 -#define _tmp_250_type 1502 -#define _tmp_251_type 1503 -#define _tmp_252_type 1504 -#define _tmp_253_type 1505 -#define _tmp_254_type 1506 -#define _tmp_255_type 1507 -#define _tmp_256_type 1508 -#define _tmp_257_type 1509 -#define _tmp_258_type 1510 -#define _tmp_259_type 1511 -#define _tmp_260_type 1512 -#define _tmp_261_type 1513 -#define _tmp_262_type 1514 -#define _tmp_263_type 1515 -#define _tmp_264_type 1516 -#define _tmp_265_type 1517 -#define _loop0_266_type 1518 -#define _tmp_267_type 1519 -#define _tmp_268_type 1520 -#define _tmp_269_type 1521 -#define _tmp_270_type 1522 -#define _tmp_271_type 1523 -#define _tmp_272_type 1524 -#define _loop0_274_type 1525 -#define _gather_273_type 1526 -#define _tmp_275_type 1527 -#define _tmp_276_type 1528 -#define _tmp_277_type 1529 -#define _tmp_278_type 1530 -#define _tmp_279_type 1531 -#define _tmp_280_type 1532 -#define _tmp_281_type 1533 +#define invalid_with_stmt_type 1224 +#define invalid_with_stmt_indent_type 1225 +#define invalid_try_stmt_type 1226 +#define invalid_except_stmt_type 1227 +#define invalid_finally_stmt_type 1228 +#define invalid_except_stmt_indent_type 1229 +#define invalid_except_star_stmt_indent_type 1230 +#define invalid_match_stmt_type 1231 +#define invalid_case_block_type 1232 +#define invalid_as_pattern_type 1233 +#define invalid_class_pattern_type 1234 +#define invalid_class_argument_pattern_type 1235 +#define invalid_if_stmt_type 1236 +#define invalid_elif_stmt_type 1237 +#define invalid_else_stmt_type 1238 +#define invalid_while_stmt_type 1239 +#define invalid_for_stmt_type 1240 +#define invalid_def_raw_type 1241 +#define invalid_class_def_raw_type 1242 +#define invalid_double_starred_kvpairs_type 1243 +#define invalid_kvpair_type 1244 +#define invalid_starred_expression_unpacking_type 1245 +#define invalid_starred_expression_type 1246 +#define invalid_replacement_field_type 1247 +#define invalid_conversion_character_type 1248 +#define invalid_arithmetic_type 1249 +#define invalid_factor_type 1250 +#define invalid_type_params_type 1251 +#define _loop0_1_type 1252 +#define _loop0_2_type 1253 +#define _loop1_3_type 1254 +#define _loop0_5_type 1255 +#define _gather_4_type 1256 +#define _tmp_6_type 1257 +#define _tmp_7_type 1258 +#define _tmp_8_type 1259 +#define _tmp_9_type 1260 +#define _tmp_10_type 1261 +#define _tmp_11_type 1262 +#define _tmp_12_type 1263 +#define _tmp_13_type 1264 +#define _loop1_14_type 1265 +#define _tmp_15_type 1266 +#define _tmp_16_type 1267 +#define _tmp_17_type 1268 +#define _loop0_19_type 1269 +#define _gather_18_type 1270 +#define _loop0_21_type 1271 +#define _gather_20_type 1272 +#define _tmp_22_type 1273 +#define _tmp_23_type 1274 +#define _loop0_24_type 1275 +#define _loop1_25_type 1276 +#define _loop0_27_type 1277 +#define _gather_26_type 1278 +#define _tmp_28_type 1279 +#define _loop0_30_type 1280 +#define _gather_29_type 1281 +#define _tmp_31_type 1282 +#define _loop1_32_type 1283 +#define _tmp_33_type 1284 +#define _tmp_34_type 1285 +#define _tmp_35_type 1286 +#define _loop0_36_type 1287 +#define _loop0_37_type 1288 +#define _loop0_38_type 1289 +#define _loop1_39_type 1290 +#define _loop0_40_type 1291 +#define _loop1_41_type 1292 +#define _loop1_42_type 1293 +#define _loop1_43_type 1294 +#define _loop0_44_type 1295 +#define _loop1_45_type 1296 +#define _loop0_46_type 1297 +#define _loop1_47_type 1298 +#define _loop0_48_type 1299 +#define _loop0_49_type 1300 +#define _loop1_50_type 1301 +#define _loop0_52_type 1302 +#define _gather_51_type 1303 +#define _loop0_54_type 1304 +#define _gather_53_type 1305 +#define _loop0_56_type 1306 +#define _gather_55_type 1307 +#define _loop0_58_type 1308 +#define _gather_57_type 1309 +#define _tmp_59_type 1310 +#define _loop1_60_type 1311 +#define _loop1_61_type 1312 +#define _tmp_62_type 1313 +#define _tmp_63_type 1314 +#define _loop1_64_type 1315 +#define _loop0_66_type 1316 +#define _gather_65_type 1317 +#define _tmp_67_type 1318 +#define _tmp_68_type 1319 +#define _tmp_69_type 1320 +#define _tmp_70_type 1321 +#define _loop0_72_type 1322 +#define _gather_71_type 1323 +#define _loop0_74_type 1324 +#define _gather_73_type 1325 +#define _tmp_75_type 1326 +#define _loop0_77_type 1327 +#define _gather_76_type 1328 +#define _loop0_79_type 1329 +#define _gather_78_type 1330 +#define _loop0_81_type 1331 +#define _gather_80_type 1332 +#define _loop1_82_type 1333 +#define _loop1_83_type 1334 +#define _loop0_85_type 1335 +#define _gather_84_type 1336 +#define _loop1_86_type 1337 +#define _loop1_87_type 1338 +#define _loop1_88_type 1339 +#define _tmp_89_type 1340 +#define _loop0_91_type 1341 +#define _gather_90_type 1342 +#define _tmp_92_type 1343 +#define _tmp_93_type 1344 +#define _tmp_94_type 1345 +#define _tmp_95_type 1346 +#define _tmp_96_type 1347 +#define _tmp_97_type 1348 +#define _loop0_98_type 1349 +#define _loop0_99_type 1350 +#define _loop0_100_type 1351 +#define _loop1_101_type 1352 +#define _loop0_102_type 1353 +#define _loop1_103_type 1354 +#define _loop1_104_type 1355 +#define _loop1_105_type 1356 +#define _loop0_106_type 1357 +#define _loop1_107_type 1358 +#define _loop0_108_type 1359 +#define _loop1_109_type 1360 +#define _loop0_110_type 1361 +#define _loop1_111_type 1362 +#define _loop0_112_type 1363 +#define _loop0_113_type 1364 +#define _loop1_114_type 1365 +#define _tmp_115_type 1366 +#define _loop0_117_type 1367 +#define _gather_116_type 1368 +#define _loop1_118_type 1369 +#define _loop0_119_type 1370 +#define _loop0_120_type 1371 +#define _tmp_121_type 1372 +#define _loop0_123_type 1373 +#define _gather_122_type 1374 +#define _tmp_124_type 1375 +#define _loop0_126_type 1376 +#define _gather_125_type 1377 +#define _loop0_128_type 1378 +#define _gather_127_type 1379 +#define _loop0_130_type 1380 +#define _gather_129_type 1381 +#define _loop0_132_type 1382 +#define _gather_131_type 1383 +#define _loop0_133_type 1384 +#define _loop0_135_type 1385 +#define _gather_134_type 1386 +#define _loop1_136_type 1387 +#define _tmp_137_type 1388 +#define _loop0_139_type 1389 +#define _gather_138_type 1390 +#define _loop0_141_type 1391 +#define _gather_140_type 1392 +#define _loop0_143_type 1393 +#define _gather_142_type 1394 +#define _loop0_145_type 1395 +#define _gather_144_type 1396 +#define _loop0_147_type 1397 +#define _gather_146_type 1398 +#define _tmp_148_type 1399 +#define _tmp_149_type 1400 +#define _loop0_151_type 1401 +#define _gather_150_type 1402 +#define _tmp_152_type 1403 +#define _tmp_153_type 1404 +#define _tmp_154_type 1405 +#define _tmp_155_type 1406 +#define _tmp_156_type 1407 +#define _tmp_157_type 1408 +#define _tmp_158_type 1409 +#define _tmp_159_type 1410 +#define _tmp_160_type 1411 +#define _tmp_161_type 1412 +#define _loop0_162_type 1413 +#define _loop0_163_type 1414 +#define _loop0_164_type 1415 +#define _tmp_165_type 1416 +#define _tmp_166_type 1417 +#define _tmp_167_type 1418 +#define _tmp_168_type 1419 +#define _loop0_169_type 1420 +#define _loop0_170_type 1421 +#define _loop0_171_type 1422 +#define _loop1_172_type 1423 +#define _tmp_173_type 1424 +#define _loop0_174_type 1425 +#define _tmp_175_type 1426 +#define _loop0_176_type 1427 +#define _loop1_177_type 1428 +#define _tmp_178_type 1429 +#define _tmp_179_type 1430 +#define _tmp_180_type 1431 +#define _loop0_181_type 1432 +#define _tmp_182_type 1433 +#define _tmp_183_type 1434 +#define _loop1_184_type 1435 +#define _tmp_185_type 1436 +#define _loop0_186_type 1437 +#define _loop0_187_type 1438 +#define _loop0_188_type 1439 +#define _loop0_190_type 1440 +#define _gather_189_type 1441 +#define _tmp_191_type 1442 +#define _loop0_192_type 1443 +#define _tmp_193_type 1444 +#define _loop0_194_type 1445 +#define _loop1_195_type 1446 +#define _loop1_196_type 1447 +#define _tmp_197_type 1448 +#define _tmp_198_type 1449 +#define _loop0_199_type 1450 +#define _tmp_200_type 1451 +#define _tmp_201_type 1452 +#define _tmp_202_type 1453 +#define _tmp_203_type 1454 +#define _loop0_205_type 1455 +#define _gather_204_type 1456 +#define _loop0_207_type 1457 +#define _gather_206_type 1458 +#define _loop0_209_type 1459 +#define _gather_208_type 1460 +#define _loop0_211_type 1461 +#define _gather_210_type 1462 +#define _loop0_213_type 1463 +#define _gather_212_type 1464 +#define _tmp_214_type 1465 +#define _loop0_215_type 1466 +#define _loop1_216_type 1467 +#define _tmp_217_type 1468 +#define _loop0_218_type 1469 +#define _loop1_219_type 1470 +#define _tmp_220_type 1471 +#define _tmp_221_type 1472 +#define _tmp_222_type 1473 +#define _tmp_223_type 1474 +#define _tmp_224_type 1475 +#define _tmp_225_type 1476 +#define _tmp_226_type 1477 +#define _tmp_227_type 1478 +#define _tmp_228_type 1479 +#define _tmp_229_type 1480 +#define _tmp_230_type 1481 +#define _loop0_232_type 1482 +#define _gather_231_type 1483 +#define _tmp_233_type 1484 +#define _tmp_234_type 1485 +#define _tmp_235_type 1486 +#define _tmp_236_type 1487 +#define _tmp_237_type 1488 +#define _tmp_238_type 1489 +#define _tmp_239_type 1490 +#define _loop0_240_type 1491 +#define _tmp_241_type 1492 +#define _tmp_242_type 1493 +#define _tmp_243_type 1494 +#define _tmp_244_type 1495 +#define _tmp_245_type 1496 +#define _tmp_246_type 1497 +#define _tmp_247_type 1498 +#define _tmp_248_type 1499 +#define _tmp_249_type 1500 +#define _tmp_250_type 1501 +#define _tmp_251_type 1502 +#define _tmp_252_type 1503 +#define _tmp_253_type 1504 +#define _tmp_254_type 1505 +#define _tmp_255_type 1506 +#define _tmp_256_type 1507 +#define _tmp_257_type 1508 +#define _tmp_258_type 1509 +#define _tmp_259_type 1510 +#define _tmp_260_type 1511 +#define _tmp_261_type 1512 +#define _tmp_262_type 1513 +#define _tmp_263_type 1514 +#define _tmp_264_type 1515 +#define _tmp_265_type 1516 +#define _loop0_266_type 1517 +#define _tmp_267_type 1518 +#define _tmp_268_type 1519 +#define _tmp_269_type 1520 +#define _tmp_270_type 1521 +#define _tmp_271_type 1522 +#define _tmp_272_type 1523 +#define _loop0_274_type 1524 +#define _gather_273_type 1525 +#define _tmp_275_type 1526 +#define _tmp_276_type 1527 +#define _tmp_277_type 1528 +#define _tmp_278_type 1529 +#define _tmp_279_type 1530 +#define _tmp_280_type 1531 +#define _tmp_281_type 1532 static mod_ty file_rule(Parser *p); static mod_ty interactive_rule(Parser *p); @@ -843,7 +842,6 @@ static void *invalid_for_target_rule(Parser *p); static void *invalid_group_rule(Parser *p); static void *invalid_import_rule(Parser *p); static void *invalid_import_from_targets_rule(Parser *p); -static void *invalid_compound_stmt_rule(Parser *p); static void *invalid_with_stmt_rule(Parser *p); static void *invalid_with_stmt_indent_rule(Parser *p); static void *invalid_try_stmt_rule(Parser *p); @@ -2062,7 +2060,6 @@ simple_stmt_rule(Parser *p) } // compound_stmt: -// | invalid_compound_stmt // | &('def' | '@' | 'async') function_def // | &'if' if_stmt // | &('class' | '@') class_def @@ -2083,25 +2080,6 @@ compound_stmt_rule(Parser *p) } stmt_ty _res = NULL; int _mark = p->mark; - if (p->call_invalid_rules) { // invalid_compound_stmt - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "invalid_compound_stmt")); - void *invalid_compound_stmt_var; - if ( - (invalid_compound_stmt_var = invalid_compound_stmt_rule(p)) // invalid_compound_stmt - ) - { - D(fprintf(stderr, "%*c+ compound_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "invalid_compound_stmt")); - _res = invalid_compound_stmt_var; - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s compound_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "invalid_compound_stmt")); - } { // &('def' | '@' | 'async') function_def if (p->error_indicator) { p->level--; @@ -2131,7 +2109,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'if' if_stmt")); stmt_ty if_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 662) // token='if' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 660) // token='if' && (if_stmt_var = if_stmt_rule(p)) // if_stmt ) @@ -2215,7 +2193,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'try' try_stmt")); stmt_ty try_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 644) // token='try' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 642) // token='try' && (try_stmt_var = try_stmt_rule(p)) // try_stmt ) @@ -2236,7 +2214,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'while' while_stmt")); stmt_ty while_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 667) // token='while' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 665) // token='while' && (while_stmt_var = while_stmt_rule(p)) // while_stmt ) @@ -4376,7 +4354,7 @@ class_def_raw_rule(Parser *p) asdl_stmt_seq* c; void *t; if ( - (_keyword = _PyPegen_expect_token(p, 679)) // token='class' + (_keyword = _PyPegen_expect_token(p, 677)) // token='class' && (a = _PyPegen_name_token(p)) // NAME && @@ -4543,7 +4521,7 @@ function_def_raw_rule(Parser *p) void *t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='def' + (_keyword = _PyPegen_expect_token(p, 675)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -4604,9 +4582,9 @@ function_def_raw_rule(Parser *p) void *t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 677)) // token='def' + (_keyword_1 = _PyPegen_expect_token(p, 675)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -5944,7 +5922,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -5989,7 +5967,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -6084,7 +6062,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 664)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 662)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -6129,7 +6107,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 664)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 662)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -6210,7 +6188,7 @@ else_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 665)) // token='else' + (_keyword = _PyPegen_expect_token(p, 663)) // token='else' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6289,7 +6267,7 @@ while_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 667)) // token='while' + (_keyword = _PyPegen_expect_token(p, 665)) // token='while' && (a = named_expression_rule(p)) // named_expression && @@ -6389,11 +6367,11 @@ for_stmt_rule(Parser *p) expr_ty t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 671)) // token='in' && (_cut_var = 1) && @@ -6451,13 +6429,13 @@ for_stmt_rule(Parser *p) expr_ty t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword_1 = _PyPegen_expect_token(p, 670)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_2 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_2 = _PyPegen_expect_token(p, 671)) // token='in' && (_cut_var = 1) && @@ -6586,7 +6564,7 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -6637,7 +6615,7 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' && (a = (asdl_withitem_seq*)_gather_53_rule(p)) // ','.with_item+ && @@ -6686,9 +6664,9 @@ with_stmt_rule(Parser *p) asdl_withitem_seq* a; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword_1 = _PyPegen_expect_token(p, 633)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -6738,9 +6716,9 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword_1 = _PyPegen_expect_token(p, 633)) // token='with' && (a = (asdl_withitem_seq*)_gather_57_rule(p)) // ','.with_item+ && @@ -6826,7 +6804,7 @@ with_item_rule(Parser *p) if ( (e = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (t = star_target_rule(p)) // star_target && @@ -6951,7 +6929,7 @@ try_stmt_rule(Parser *p) asdl_stmt_seq* b; asdl_stmt_seq* f; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6995,7 +6973,7 @@ try_stmt_rule(Parser *p) asdl_excepthandler_seq* ex; void *f; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7043,7 +7021,7 @@ try_stmt_rule(Parser *p) asdl_excepthandler_seq* ex; void *f; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7141,7 +7119,7 @@ except_block_rule(Parser *p) expr_ty e; void *t; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' && (e = expression_rule(p)) // expression && @@ -7184,7 +7162,7 @@ except_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -7295,7 +7273,7 @@ except_star_block_rule(Parser *p) expr_ty e; void *t; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -7397,7 +7375,7 @@ finally_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 653)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 651)) // token='finally' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7705,7 +7683,7 @@ guard_rule(Parser *p) Token * _keyword; expr_ty guard; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (guard = named_expression_rule(p)) // named_expression ) @@ -7900,7 +7878,7 @@ as_pattern_rule(Parser *p) if ( (pattern = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (target = pattern_capture_target_rule(p)) // pattern_capture_target ) @@ -11195,11 +11173,11 @@ expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 665)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 663)) // token='else' && (c = expression_rule(p)) // expression ) @@ -12081,7 +12059,7 @@ inversion_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 681)) // token='not' + (_keyword = _PyPegen_expect_token(p, 679)) // token='not' && (a = inversion_rule(p)) // inversion ) @@ -12735,9 +12713,9 @@ notin_bitwise_or_rule(Parser *p) Token * _keyword_1; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 681)) // token='not' + (_keyword = _PyPegen_expect_token(p, 679)) // token='not' && - (_keyword_1 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 671)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -12783,7 +12761,7 @@ in_bitwise_or_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword = _PyPegen_expect_token(p, 671)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -12832,7 +12810,7 @@ isnot_bitwise_or_rule(Parser *p) if ( (_keyword = _PyPegen_expect_token(p, 589)) // token='is' && - (_keyword_1 = _PyPegen_expect_token(p, 681)) // token='not' + (_keyword_1 = _PyPegen_expect_token(p, 679)) // token='not' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -17003,13 +16981,13 @@ for_if_clause_rule(Parser *p) expr_ty b; asdl_expr_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword_1 = _PyPegen_expect_token(p, 670)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_2 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_2 = _PyPegen_expect_token(p, 671)) // token='in' && (_cut_var = 1) && @@ -17048,11 +17026,11 @@ for_if_clause_rule(Parser *p) expr_ty b; asdl_expr_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 671)) // token='in' && (_cut_var = 1) && @@ -20353,11 +20331,11 @@ expression_without_invalid_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 665)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 663)) // token='else' && (c = expression_rule(p)) // expression ) @@ -20623,7 +20601,7 @@ invalid_expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (b = disjunction_rule(p)) // disjunction && @@ -22561,7 +22539,7 @@ invalid_with_item_rule(Parser *p) if ( (expression_var = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (a = expression_rule(p)) // expression && @@ -22611,13 +22589,13 @@ invalid_for_if_clause_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings void *_tmp_203_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (_tmp_203_var = _tmp_203_rule(p)) // bitwise_or ((',' bitwise_or))* ','? && - _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 673) // token='in' + _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 671) // token='in' ) { D(fprintf(stderr, "%*c+ invalid_for_if_clause[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'? 'for' (bitwise_or ((',' bitwise_or))* ','?) !'in'")); @@ -22663,9 +22641,9 @@ invalid_for_target_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings expr_ty a; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (a = star_expressions_rule(p)) // star_expressions ) @@ -22923,82 +22901,6 @@ invalid_import_from_targets_rule(Parser *p) return _res; } -// invalid_compound_stmt: 'elif' named_expression ':' | 'else' ':' -static void * -invalid_compound_stmt_rule(Parser *p) -{ - if (p->level++ == MAXSTACK) { - _Pypegen_stack_overflow(p); - } - if (p->error_indicator) { - p->level--; - return NULL; - } - void * _res = NULL; - int _mark = p->mark; - { // 'elif' named_expression ':' - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> invalid_compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'elif' named_expression ':'")); - Token * _literal; - Token * a; - expr_ty named_expression_var; - if ( - (a = _PyPegen_expect_token(p, 664)) // token='elif' - && - (named_expression_var = named_expression_rule(p)) // named_expression - && - (_literal = _PyPegen_expect_token(p, 11)) // token=':' - ) - { - D(fprintf(stderr, "%*c+ invalid_compound_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'elif' named_expression ':'")); - _res = RAISE_SYNTAX_ERROR_STARTING_FROM ( a , "'elif' must match an if-statement here" ); - if (_res == NULL && PyErr_Occurred()) { - p->error_indicator = 1; - p->level--; - return NULL; - } - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s invalid_compound_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'elif' named_expression ':'")); - } - { // 'else' ':' - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> invalid_compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'else' ':'")); - Token * _literal; - Token * a; - if ( - (a = _PyPegen_expect_token(p, 665)) // token='else' - && - (_literal = _PyPegen_expect_token(p, 11)) // token=':' - ) - { - D(fprintf(stderr, "%*c+ invalid_compound_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else' ':'")); - _res = RAISE_SYNTAX_ERROR_STARTING_FROM ( a , "'else' must match a valid statement here" ); - if (_res == NULL && PyErr_Occurred()) { - p->error_indicator = 1; - p->level--; - return NULL; - } - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s invalid_compound_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'else' ':'")); - } - _res = NULL; - done: - p->level--; - return _res; -} - // invalid_with_stmt: // | 'async'? 'with' ','.(expression ['as' star_target])+ NEWLINE // | 'async'? 'with' '(' ','.(expressions ['as' star_target])+ ','? ')' NEWLINE @@ -23026,9 +22928,9 @@ invalid_with_stmt_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' && (_gather_206_var = _gather_206_rule(p)) // ','.(expression ['as' star_target])+ && @@ -23064,9 +22966,9 @@ invalid_with_stmt_rule(Parser *p) UNUSED(_opt_var_1); // Silence compiler warnings Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -23126,9 +23028,9 @@ invalid_with_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 635)) // token='with' + (a = _PyPegen_expect_token(p, 633)) // token='with' && (_gather_210_var = _gather_210_rule(p)) // ','.(expression ['as' star_target])+ && @@ -23169,9 +23071,9 @@ invalid_with_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 635)) // token='with' + (a = _PyPegen_expect_token(p, 633)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -23234,7 +23136,7 @@ invalid_try_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 644)) // token='try' + (a = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23266,7 +23168,7 @@ invalid_try_stmt_rule(Parser *p) Token * _literal; asdl_stmt_seq* block_var; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23305,7 +23207,7 @@ invalid_try_stmt_rule(Parser *p) Token * b; expr_ty expression_var; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23313,7 +23215,7 @@ invalid_try_stmt_rule(Parser *p) && (_loop1_216_var = _loop1_216_rule(p)) // except_block+ && - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (b = _PyPegen_expect_token(p, 16)) // token='*' && @@ -23352,7 +23254,7 @@ invalid_try_stmt_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings Token * a; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23360,7 +23262,7 @@ invalid_try_stmt_rule(Parser *p) && (_loop1_219_var = _loop1_219_rule(p)) // except_star_block+ && - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_opt_var = _tmp_220_rule(p), !p->error_indicator) // [expression ['as' NAME]] && @@ -23419,7 +23321,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty a; expr_ty expressions_var; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' && (_opt_var = _PyPegen_expect_token(p, 16), !p->error_indicator) // '*'? && @@ -23461,7 +23363,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_opt_var = _PyPegen_expect_token(p, 16), !p->error_indicator) // '*'? && @@ -23494,7 +23396,7 @@ invalid_except_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (newline_var = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) @@ -23522,7 +23424,7 @@ invalid_except_stmt_rule(Parser *p) void *_tmp_223_var; Token * a; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -23571,7 +23473,7 @@ invalid_finally_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 653)) // token='finally' + (a = _PyPegen_expect_token(p, 651)) // token='finally' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23627,7 +23529,7 @@ invalid_except_stmt_indent_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (expression_var = expression_rule(p)) // expression && @@ -23663,7 +23565,7 @@ invalid_except_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23719,7 +23621,7 @@ invalid_except_star_stmt_indent_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -23958,7 +23860,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (a = _PyPegen_expect_soft_keyword(p, "_")) // soft_keyword='"_"' ) @@ -23988,7 +23890,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) && @@ -24142,7 +24044,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24173,7 +24075,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty a_1; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 662)) // token='if' + (a = _PyPegen_expect_token(p, 660)) // token='if' && (a_1 = named_expression_rule(p)) // named_expression && @@ -24228,7 +24130,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 664)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 662)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24259,7 +24161,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 664)) // token='elif' + (a = _PyPegen_expect_token(p, 662)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24312,7 +24214,7 @@ invalid_else_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 665)) // token='else' + (a = _PyPegen_expect_token(p, 663)) // token='else' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -24365,7 +24267,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 667)) // token='while' + (_keyword = _PyPegen_expect_token(p, 665)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24396,7 +24298,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 667)) // token='while' + (a = _PyPegen_expect_token(p, 665)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24455,13 +24357,13 @@ invalid_for_stmt_rule(Parser *p) expr_ty star_expressions_var; expr_ty star_targets_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (star_targets_var = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 671)) // token='in' && (star_expressions_var = star_expressions_rule(p)) // star_expressions && @@ -24496,13 +24398,13 @@ invalid_for_stmt_rule(Parser *p) expr_ty star_expressions_var; expr_ty star_targets_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 672)) // token='for' + (a = _PyPegen_expect_token(p, 670)) // token='for' && (star_targets_var = star_targets_rule(p)) // star_targets && - (_keyword = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword = _PyPegen_expect_token(p, 671)) // token='in' && (star_expressions_var = star_expressions_rule(p)) // star_expressions && @@ -24568,9 +24470,9 @@ invalid_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 677)) // token='def' + (a = _PyPegen_expect_token(p, 675)) // token='def' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -24627,9 +24529,9 @@ invalid_def_raw_rule(Parser *p) asdl_stmt_seq* block_var; expr_ty name_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 677)) // token='def' + (_keyword = _PyPegen_expect_token(p, 675)) // token='def' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -24693,7 +24595,7 @@ invalid_class_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 679)) // token='class' + (_keyword = _PyPegen_expect_token(p, 677)) // token='class' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -24732,7 +24634,7 @@ invalid_class_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 679)) // token='class' + (a = _PyPegen_expect_token(p, 677)) // token='class' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -25550,7 +25452,7 @@ invalid_arithmetic_rule(Parser *p) && (_tmp_243_var = _tmp_243_rule(p)) // '+' | '-' | '*' | '/' | '%' | '//' | '@' && - (a = _PyPegen_expect_token(p, 681)) // token='not' + (a = _PyPegen_expect_token(p, 679)) // token='not' && (b = inversion_rule(p)) // inversion ) @@ -25599,7 +25501,7 @@ invalid_factor_rule(Parser *p) if ( (_tmp_244_var = _tmp_244_rule(p)) // '+' | '-' | '~' && - (a = _PyPegen_expect_token(p, 681)) // token='not' + (a = _PyPegen_expect_token(p, 679)) // token='not' && (b = factor_rule(p)) // factor ) @@ -26070,7 +25972,7 @@ _tmp_7_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_7[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'def'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='def' + (_keyword = _PyPegen_expect_token(p, 675)) // token='def' ) { D(fprintf(stderr, "%*c+ _tmp_7[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'def'")); @@ -26108,7 +26010,7 @@ _tmp_7_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_7[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' ) { D(fprintf(stderr, "%*c+ _tmp_7[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'")); @@ -26146,7 +26048,7 @@ _tmp_8_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_8[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'class'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 679)) // token='class' + (_keyword = _PyPegen_expect_token(p, 677)) // token='class' ) { D(fprintf(stderr, "%*c+ _tmp_8[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'class'")); @@ -26203,7 +26105,7 @@ _tmp_9_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_9[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'with'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' ) { D(fprintf(stderr, "%*c+ _tmp_9[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'with'")); @@ -26222,7 +26124,7 @@ _tmp_9_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_9[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' ) { D(fprintf(stderr, "%*c+ _tmp_9[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'")); @@ -26260,7 +26162,7 @@ _tmp_10_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_10[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'for'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' ) { D(fprintf(stderr, "%*c+ _tmp_10[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'for'")); @@ -26279,7 +26181,7 @@ _tmp_10_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_10[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' ) { D(fprintf(stderr, "%*c+ _tmp_10[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'")); @@ -27303,7 +27205,7 @@ _tmp_28_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -27466,7 +27368,7 @@ _tmp_31_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -29453,7 +29355,7 @@ _tmp_62_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -29499,7 +29401,7 @@ _tmp_63_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -35341,7 +35243,7 @@ _tmp_158_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_158[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'else'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 665)) // token='else' + (_keyword = _PyPegen_expect_token(p, 663)) // token='else' ) { D(fprintf(stderr, "%*c+ _tmp_158[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else'")); @@ -38922,7 +38824,7 @@ _tmp_214_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_214[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' ) { D(fprintf(stderr, "%*c+ _tmp_214[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except'")); @@ -38941,7 +38843,7 @@ _tmp_214_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_214[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'finally'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 653)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 651)) // token='finally' ) { D(fprintf(stderr, "%*c+ _tmp_214[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'finally'")); @@ -39119,7 +39021,7 @@ _tmp_217_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -39341,7 +39243,7 @@ _tmp_221_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -39382,7 +39284,7 @@ _tmp_222_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -39480,7 +39382,7 @@ _tmp_224_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -39521,7 +39423,7 @@ _tmp_225_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -41216,7 +41118,7 @@ _tmp_255_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (z = disjunction_rule(p)) // disjunction ) @@ -41262,7 +41164,7 @@ _tmp_256_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (z = disjunction_rule(p)) // disjunction ) @@ -41975,7 +41877,7 @@ _tmp_271_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -42234,7 +42136,7 @@ _tmp_276_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -42275,7 +42177,7 @@ _tmp_277_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -42316,7 +42218,7 @@ _tmp_278_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -42357,7 +42259,7 @@ _tmp_279_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) From 8d63c8d47b9edd8ac2f0b395b2fa0ae5f571252d Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 4 Jun 2024 02:36:28 -0400 Subject: [PATCH 070/373] gh-106531: Apply changes from importlib_resources 6.3.2 (#117054) Apply changes from importlib_resources 6.3.2. --- Lib/importlib/resources/_common.py | 2 + Lib/importlib/resources/readers.py | 54 ++++++- .../resources/data01/subdirectory/binary.file | Bin 4 -> 4 bytes .../namespacedata01/subdirectory/binary.file | 1 + .../test_importlib/resources/test_contents.py | 2 +- .../test_importlib/resources/test_custom.py | 6 +- .../test_importlib/resources/test_files.py | 14 +- .../test_importlib/resources/test_open.py | 6 +- .../test_importlib/resources/test_path.py | 12 +- .../test_importlib/resources/test_read.py | 29 +++- .../test_importlib/resources/test_reader.py | 29 ++-- .../test_importlib/resources/test_resource.py | 134 ++++++++---------- Lib/test/test_importlib/resources/util.py | 51 +++---- Lib/test/test_importlib/resources/zip.py | 30 ++++ Makefile.pre.in | 1 + ...-03-19-21-41-31.gh-issue-106531.Mgd--6.rst | 6 + 16 files changed, 231 insertions(+), 146 deletions(-) create mode 100644 Lib/test/test_importlib/resources/namespacedata01/subdirectory/binary.file create mode 100755 Lib/test/test_importlib/resources/zip.py create mode 100644 Misc/NEWS.d/next/Library/2024-03-19-21-41-31.gh-issue-106531.Mgd--6.rst diff --git a/Lib/importlib/resources/_common.py b/Lib/importlib/resources/_common.py index e18082fb3d26a0..ca5b06743b46a6 100644 --- a/Lib/importlib/resources/_common.py +++ b/Lib/importlib/resources/_common.py @@ -25,6 +25,8 @@ def package_to_anchor(func): >>> files('a', 'b') Traceback (most recent call last): TypeError: files() takes from 0 to 1 positional arguments but 2 were given + + Remove this compatibility in Python 3.14. """ undefined = object() diff --git a/Lib/importlib/resources/readers.py b/Lib/importlib/resources/readers.py index c3cdf769cbecb0..b86cdeff57c4c2 100644 --- a/Lib/importlib/resources/readers.py +++ b/Lib/importlib/resources/readers.py @@ -1,7 +1,10 @@ import collections +import contextlib import itertools import pathlib import operator +import re +import warnings import zipfile from . import abc @@ -62,7 +65,7 @@ class MultiplexedPath(abc.Traversable): """ def __init__(self, *paths): - self._paths = list(map(pathlib.Path, remove_duplicates(paths))) + self._paths = list(map(_ensure_traversable, remove_duplicates(paths))) if not self._paths: message = 'MultiplexedPath must contain at least one path' raise FileNotFoundError(message) @@ -130,7 +133,36 @@ class NamespaceReader(abc.TraversableResources): def __init__(self, namespace_path): if 'NamespacePath' not in str(namespace_path): raise ValueError('Invalid path') - self.path = MultiplexedPath(*list(namespace_path)) + self.path = MultiplexedPath(*map(self._resolve, namespace_path)) + + @classmethod + def _resolve(cls, path_str) -> abc.Traversable: + r""" + Given an item from a namespace path, resolve it to a Traversable. + + path_str might be a directory on the filesystem or a path to a + zipfile plus the path within the zipfile, e.g. ``/foo/bar`` or + ``/foo/baz.zip/inner_dir`` or ``foo\baz.zip\inner_dir\sub``. + """ + (dir,) = (cand for cand in cls._candidate_paths(path_str) if cand.is_dir()) + return dir + + @classmethod + def _candidate_paths(cls, path_str): + yield pathlib.Path(path_str) + yield from cls._resolve_zip_path(path_str) + + @staticmethod + def _resolve_zip_path(path_str): + for match in reversed(list(re.finditer(r'[\\/]', path_str))): + with contextlib.suppress( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, + PermissionError, + ): + inner = path_str[match.end() :].replace('\\', '/') + '/' + yield zipfile.Path(path_str[: match.start()], inner.lstrip('/')) def resource_path(self, resource): """ @@ -142,3 +174,21 @@ def resource_path(self, resource): def files(self): return self.path + + +def _ensure_traversable(path): + """ + Convert deprecated string arguments to traversables (pathlib.Path). + + Remove with Python 3.15. + """ + if not isinstance(path, str): + return path + + warnings.warn( + "String arguments are deprecated. Pass a Traversable instead.", + DeprecationWarning, + stacklevel=3, + ) + + return pathlib.Path(path) diff --git a/Lib/test/test_importlib/resources/data01/subdirectory/binary.file b/Lib/test/test_importlib/resources/data01/subdirectory/binary.file index eaf36c1daccfdf325514461cd1a2ffbc139b5464..5bd8bb897b13225c93a1d26baa88c96b7bd5d817 100644 GIT binary patch literal 4 LcmZQ!Wn%{b05$*@ literal 4 LcmZQzWMT#Y01f~L diff --git a/Lib/test/test_importlib/resources/namespacedata01/subdirectory/binary.file b/Lib/test/test_importlib/resources/namespacedata01/subdirectory/binary.file new file mode 100644 index 00000000000000..100f50643d8d21 --- /dev/null +++ b/Lib/test/test_importlib/resources/namespacedata01/subdirectory/binary.file @@ -0,0 +1 @@ +  \ No newline at end of file diff --git a/Lib/test/test_importlib/resources/test_contents.py b/Lib/test/test_importlib/resources/test_contents.py index 1a13f043a86f03..beab67ccc21680 100644 --- a/Lib/test/test_importlib/resources/test_contents.py +++ b/Lib/test/test_importlib/resources/test_contents.py @@ -31,8 +31,8 @@ class ContentsZipTests(ContentsTests, util.ZipSetup, unittest.TestCase): class ContentsNamespaceTests(ContentsTests, unittest.TestCase): expected = { # no __init__ because of namespace design - # no subdirectory as incidental difference in fixture 'binary.file', + 'subdirectory', 'utf-16.file', 'utf-8.file', } diff --git a/Lib/test/test_importlib/resources/test_custom.py b/Lib/test/test_importlib/resources/test_custom.py index 73127209a2761b..640f90fc0dd91a 100644 --- a/Lib/test/test_importlib/resources/test_custom.py +++ b/Lib/test/test_importlib/resources/test_custom.py @@ -5,6 +5,7 @@ from test.support import os_helper from importlib import resources +from importlib.resources import abc from importlib.resources.abc import TraversableResources, ResourceReader from . import util @@ -39,8 +40,9 @@ def setUp(self): self.addCleanup(self.fixtures.close) def test_custom_loader(self): - temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) + temp_dir = pathlib.Path(self.fixtures.enter_context(os_helper.temp_dir())) loader = SimpleLoader(MagicResources(temp_dir)) pkg = util.create_package_from_loader(loader) files = resources.files(pkg) - assert files is temp_dir + assert isinstance(files, abc.Traversable) + assert list(files.iterdir()) == [] diff --git a/Lib/test/test_importlib/resources/test_files.py b/Lib/test/test_importlib/resources/test_files.py index 26c8b04e44c3b9..7df6d03ead7480 100644 --- a/Lib/test/test_importlib/resources/test_files.py +++ b/Lib/test/test_importlib/resources/test_files.py @@ -1,4 +1,3 @@ -import typing import textwrap import unittest import warnings @@ -32,13 +31,14 @@ def test_read_text(self): actual = files.joinpath('utf-8.file').read_text(encoding='utf-8') assert actual == 'Hello, UTF-8 world!\n' - @unittest.skipUnless( - hasattr(typing, 'runtime_checkable'), - "Only suitable when typing supports runtime_checkable", - ) def test_traversable(self): assert isinstance(resources.files(self.data), Traversable) + def test_joinpath_with_multiple_args(self): + files = resources.files(self.data) + binfile = files.joinpath('subdirectory', 'binary.file') + self.assertTrue(binfile.is_file()) + def test_old_parameter(self): """ Files used to take a 'package' parameter. Make sure anyone @@ -64,6 +64,10 @@ def setUp(self): self.data = namespacedata01 +class OpenNamespaceZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + ZIP_MODULE = 'namespacedata01' + + class SiteDir: def setUp(self): self.fixtures = contextlib.ExitStack() diff --git a/Lib/test/test_importlib/resources/test_open.py b/Lib/test/test_importlib/resources/test_open.py index 86becb4bfaad37..3b6b2142ef47b1 100644 --- a/Lib/test/test_importlib/resources/test_open.py +++ b/Lib/test/test_importlib/resources/test_open.py @@ -24,7 +24,7 @@ def test_open_binary(self): target = resources.files(self.data) / 'binary.file' with target.open('rb') as fp: result = fp.read() - self.assertEqual(result, b'\x00\x01\x02\x03') + self.assertEqual(result, bytes(range(4))) def test_open_text_default_encoding(self): target = resources.files(self.data) / 'utf-8.file' @@ -81,5 +81,9 @@ class OpenZipTests(OpenTests, util.ZipSetup, unittest.TestCase): pass +class OpenNamespaceZipTests(OpenTests, util.ZipSetup, unittest.TestCase): + ZIP_MODULE = 'namespacedata01' + + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_importlib/resources/test_path.py b/Lib/test/test_importlib/resources/test_path.py index 34a6bdd2d58b91..90b22905ab8692 100644 --- a/Lib/test/test_importlib/resources/test_path.py +++ b/Lib/test/test_importlib/resources/test_path.py @@ -1,4 +1,5 @@ import io +import pathlib import unittest from importlib import resources @@ -15,18 +16,13 @@ def execute(self, package, path): class PathTests: def test_reading(self): """ - Path should be readable. - - Test also implicitly verifies the returned object is a pathlib.Path - instance. + Path should be readable and a pathlib.Path instance. """ target = resources.files(self.data) / 'utf-8.file' with resources.as_file(target) as path: + self.assertIsInstance(path, pathlib.Path) self.assertTrue(path.name.endswith("utf-8.file"), repr(path)) - # pathlib.Path.read_text() was introduced in Python 3.5. - with path.open('r', encoding='utf-8') as file: - text = file.read() - self.assertEqual('Hello, UTF-8 world!\n', text) + self.assertEqual('Hello, UTF-8 world!\n', path.read_text(encoding='utf-8')) class PathDiskTests(PathTests, unittest.TestCase): diff --git a/Lib/test/test_importlib/resources/test_read.py b/Lib/test/test_importlib/resources/test_read.py index 088982681e8b0c..984feecbb9ed69 100644 --- a/Lib/test/test_importlib/resources/test_read.py +++ b/Lib/test/test_importlib/resources/test_read.py @@ -18,7 +18,7 @@ def execute(self, package, path): class ReadTests: def test_read_bytes(self): result = resources.files(self.data).joinpath('binary.file').read_bytes() - self.assertEqual(result, b'\0\1\2\3') + self.assertEqual(result, bytes(range(4))) def test_read_text_default_encoding(self): result = ( @@ -57,17 +57,15 @@ class ReadDiskTests(ReadTests, unittest.TestCase): class ReadZipTests(ReadTests, util.ZipSetup, unittest.TestCase): def test_read_submodule_resource(self): - submodule = import_module('ziptestdata.subdirectory') + submodule = import_module('data01.subdirectory') result = resources.files(submodule).joinpath('binary.file').read_bytes() - self.assertEqual(result, b'\0\1\2\3') + self.assertEqual(result, bytes(range(4, 8))) def test_read_submodule_resource_by_name(self): result = ( - resources.files('ziptestdata.subdirectory') - .joinpath('binary.file') - .read_bytes() + resources.files('data01.subdirectory').joinpath('binary.file').read_bytes() ) - self.assertEqual(result, b'\0\1\2\3') + self.assertEqual(result, bytes(range(4, 8))) class ReadNamespaceTests(ReadTests, unittest.TestCase): @@ -77,5 +75,22 @@ def setUp(self): self.data = namespacedata01 +class ReadNamespaceZipTests(ReadTests, util.ZipSetup, unittest.TestCase): + ZIP_MODULE = 'namespacedata01' + + def test_read_submodule_resource(self): + submodule = import_module('namespacedata01.subdirectory') + result = resources.files(submodule).joinpath('binary.file').read_bytes() + self.assertEqual(result, bytes(range(12, 16))) + + def test_read_submodule_resource_by_name(self): + result = ( + resources.files('namespacedata01.subdirectory') + .joinpath('binary.file') + .read_bytes() + ) + self.assertEqual(result, bytes(range(12, 16))) + + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_importlib/resources/test_reader.py b/Lib/test/test_importlib/resources/test_reader.py index 8670f72a334585..dac9c2a892ffd2 100644 --- a/Lib/test/test_importlib/resources/test_reader.py +++ b/Lib/test/test_importlib/resources/test_reader.py @@ -10,8 +10,7 @@ class MultiplexedPathTest(unittest.TestCase): @classmethod def setUpClass(cls): - path = pathlib.Path(__file__).parent / 'namespacedata01' - cls.folder = str(path) + cls.folder = pathlib.Path(__file__).parent / 'namespacedata01' def test_init_no_paths(self): with self.assertRaises(FileNotFoundError): @@ -19,7 +18,7 @@ def test_init_no_paths(self): def test_init_file(self): with self.assertRaises(NotADirectoryError): - MultiplexedPath(os.path.join(self.folder, 'binary.file')) + MultiplexedPath(self.folder / 'binary.file') def test_iterdir(self): contents = {path.name for path in MultiplexedPath(self.folder).iterdir()} @@ -27,10 +26,12 @@ def test_iterdir(self): contents.remove('__pycache__') except (KeyError, ValueError): pass - self.assertEqual(contents, {'binary.file', 'utf-16.file', 'utf-8.file'}) + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-16.file', 'utf-8.file'} + ) def test_iterdir_duplicate(self): - data01 = os.path.abspath(os.path.join(__file__, '..', 'data01')) + data01 = pathlib.Path(__file__).parent.joinpath('data01') contents = { path.name for path in MultiplexedPath(self.folder, data01).iterdir() } @@ -60,17 +61,17 @@ def test_open_file(self): path.open() def test_join_path(self): - prefix = os.path.abspath(os.path.join(__file__, '..')) - data01 = os.path.join(prefix, 'data01') + data01 = pathlib.Path(__file__).parent.joinpath('data01') + prefix = str(data01.parent) path = MultiplexedPath(self.folder, data01) self.assertEqual( str(path.joinpath('binary.file'))[len(prefix) + 1 :], os.path.join('namespacedata01', 'binary.file'), ) - self.assertEqual( - str(path.joinpath('subdirectory'))[len(prefix) + 1 :], - os.path.join('data01', 'subdirectory'), - ) + sub = path.joinpath('subdirectory') + assert isinstance(sub, MultiplexedPath) + assert 'namespacedata01' in str(sub) + assert 'data01' in str(sub) self.assertEqual( str(path.joinpath('imaginary'))[len(prefix) + 1 :], os.path.join('namespacedata01', 'imaginary'), @@ -82,9 +83,9 @@ def test_join_path_compound(self): assert not path.joinpath('imaginary/foo.py').exists() def test_join_path_common_subdir(self): - prefix = os.path.abspath(os.path.join(__file__, '..')) - data01 = os.path.join(prefix, 'data01') - data02 = os.path.join(prefix, 'data02') + data01 = pathlib.Path(__file__).parent.joinpath('data01') + data02 = pathlib.Path(__file__).parent.joinpath('data02') + prefix = str(data01.parent) path = MultiplexedPath(data01, data02) self.assertIsInstance(path.joinpath('subdirectory'), MultiplexedPath) self.assertEqual( diff --git a/Lib/test/test_importlib/resources/test_resource.py b/Lib/test/test_importlib/resources/test_resource.py index 6f75cf57f03d02..d1d45d9b4617f3 100644 --- a/Lib/test/test_importlib/resources/test_resource.py +++ b/Lib/test/test_importlib/resources/test_resource.py @@ -1,15 +1,10 @@ -import contextlib import sys import unittest -import uuid import pathlib from . import data01 -from . import zipdata01, zipdata02 from . import util from importlib import resources, import_module -from test.support import import_helper, os_helper -from test.support.os_helper import unlink class ResourceTests: @@ -89,34 +84,32 @@ def test_package_has_no_reader_fallback(self): class ResourceFromZipsTest01(util.ZipSetupBase, unittest.TestCase): - ZIP_MODULE = zipdata01 # type: ignore + ZIP_MODULE = 'data01' def test_is_submodule_resource(self): - submodule = import_module('ziptestdata.subdirectory') + submodule = import_module('data01.subdirectory') self.assertTrue(resources.files(submodule).joinpath('binary.file').is_file()) def test_read_submodule_resource_by_name(self): self.assertTrue( - resources.files('ziptestdata.subdirectory') - .joinpath('binary.file') - .is_file() + resources.files('data01.subdirectory').joinpath('binary.file').is_file() ) def test_submodule_contents(self): - submodule = import_module('ziptestdata.subdirectory') + submodule = import_module('data01.subdirectory') self.assertEqual( names(resources.files(submodule)), {'__init__.py', 'binary.file'} ) def test_submodule_contents_by_name(self): self.assertEqual( - names(resources.files('ziptestdata.subdirectory')), + names(resources.files('data01.subdirectory')), {'__init__.py', 'binary.file'}, ) def test_as_file_directory(self): - with resources.as_file(resources.files('ziptestdata')) as data: - assert data.name == 'ziptestdata' + with resources.as_file(resources.files('data01')) as data: + assert data.name == 'data01' assert data.is_dir() assert data.joinpath('subdirectory').is_dir() assert len(list(data.iterdir())) @@ -124,7 +117,7 @@ def test_as_file_directory(self): class ResourceFromZipsTest02(util.ZipSetupBase, unittest.TestCase): - ZIP_MODULE = zipdata02 # type: ignore + ZIP_MODULE = 'data02' def test_unrelated_contents(self): """ @@ -132,93 +125,48 @@ def test_unrelated_contents(self): distinct resources. Ref python/importlib_resources#44. """ self.assertEqual( - names(resources.files('ziptestdata.one')), + names(resources.files('data02.one')), {'__init__.py', 'resource1.txt'}, ) self.assertEqual( - names(resources.files('ziptestdata.two')), + names(resources.files('data02.two')), {'__init__.py', 'resource2.txt'}, ) -@contextlib.contextmanager -def zip_on_path(dir): - data_path = pathlib.Path(zipdata01.__file__) - source_zip_path = data_path.parent.joinpath('ziptestdata.zip') - zip_path = pathlib.Path(dir) / f'{uuid.uuid4()}.zip' - zip_path.write_bytes(source_zip_path.read_bytes()) - sys.path.append(str(zip_path)) - import_module('ziptestdata') - - try: - yield - finally: - with contextlib.suppress(ValueError): - sys.path.remove(str(zip_path)) - - with contextlib.suppress(KeyError): - del sys.path_importer_cache[str(zip_path)] - del sys.modules['ziptestdata'] - - with contextlib.suppress(OSError): - unlink(zip_path) - - -class DeletingZipsTest(unittest.TestCase): +class DeletingZipsTest(util.ZipSetupBase, unittest.TestCase): """Having accessed resources in a zip file should not keep an open reference to the zip. """ - def setUp(self): - self.fixtures = contextlib.ExitStack() - self.addCleanup(self.fixtures.close) - - modules = import_helper.modules_setup() - self.addCleanup(import_helper.modules_cleanup, *modules) - - temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) - self.fixtures.enter_context(zip_on_path(temp_dir)) - def test_iterdir_does_not_keep_open(self): - [item.name for item in resources.files('ziptestdata').iterdir()] + [item.name for item in resources.files('data01').iterdir()] def test_is_file_does_not_keep_open(self): - resources.files('ziptestdata').joinpath('binary.file').is_file() + resources.files('data01').joinpath('binary.file').is_file() def test_is_file_failure_does_not_keep_open(self): - resources.files('ziptestdata').joinpath('not-present').is_file() + resources.files('data01').joinpath('not-present').is_file() @unittest.skip("Desired but not supported.") def test_as_file_does_not_keep_open(self): # pragma: no cover - resources.as_file(resources.files('ziptestdata') / 'binary.file') + resources.as_file(resources.files('data01') / 'binary.file') def test_entered_path_does_not_keep_open(self): """ Mimic what certifi does on import to make its bundle available for the process duration. """ - resources.as_file(resources.files('ziptestdata') / 'binary.file').__enter__() + resources.as_file(resources.files('data01') / 'binary.file').__enter__() def test_read_binary_does_not_keep_open(self): - resources.files('ziptestdata').joinpath('binary.file').read_bytes() + resources.files('data01').joinpath('binary.file').read_bytes() def test_read_text_does_not_keep_open(self): - resources.files('ziptestdata').joinpath('utf-8.file').read_text( - encoding='utf-8' - ) + resources.files('data01').joinpath('utf-8.file').read_text(encoding='utf-8') -class ResourceFromNamespaceTest01(unittest.TestCase): - site_dir = str(pathlib.Path(__file__).parent) - - @classmethod - def setUpClass(cls): - sys.path.append(cls.site_dir) - - @classmethod - def tearDownClass(cls): - sys.path.remove(cls.site_dir) - +class ResourceFromNamespaceTests: def test_is_submodule_resource(self): self.assertTrue( resources.files(import_module('namespacedata01')) @@ -237,7 +185,9 @@ def test_submodule_contents(self): contents.remove('__pycache__') except KeyError: pass - self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'}) + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-8.file', 'utf-16.file'} + ) def test_submodule_contents_by_name(self): contents = names(resources.files('namespacedata01')) @@ -245,7 +195,45 @@ def test_submodule_contents_by_name(self): contents.remove('__pycache__') except KeyError: pass - self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'}) + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-8.file', 'utf-16.file'} + ) + + def test_submodule_sub_contents(self): + contents = names(resources.files(import_module('namespacedata01.subdirectory'))) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file'}) + + def test_submodule_sub_contents_by_name(self): + contents = names(resources.files('namespacedata01.subdirectory')) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file'}) + + +class ResourceFromNamespaceDiskTests(ResourceFromNamespaceTests, unittest.TestCase): + site_dir = str(pathlib.Path(__file__).parent) + + @classmethod + def setUpClass(cls): + sys.path.append(cls.site_dir) + + @classmethod + def tearDownClass(cls): + sys.path.remove(cls.site_dir) + + +class ResourceFromNamespaceZipTests( + util.ZipSetupBase, + ResourceFromNamespaceTests, + unittest.TestCase, +): + ZIP_MODULE = 'namespacedata01' if __name__ == '__main__': diff --git a/Lib/test/test_importlib/resources/util.py b/Lib/test/test_importlib/resources/util.py index dbe6ee81476699..d4bf3e6cc5dfdc 100644 --- a/Lib/test/test_importlib/resources/util.py +++ b/Lib/test/test_importlib/resources/util.py @@ -4,11 +4,12 @@ import sys import types import pathlib +import contextlib from . import data01 -from . import zipdata01 from importlib.resources.abc import ResourceReader -from test.support import import_helper +from test.support import import_helper, os_helper +from . import zip as zip_ from importlib.machinery import ModuleSpec @@ -141,39 +142,23 @@ def test_useless_loader(self): class ZipSetupBase: - ZIP_MODULE = None - - @classmethod - def setUpClass(cls): - data_path = pathlib.Path(cls.ZIP_MODULE.__file__) - data_dir = data_path.parent - cls._zip_path = str(data_dir / 'ziptestdata.zip') - sys.path.append(cls._zip_path) - cls.data = importlib.import_module('ziptestdata') - - @classmethod - def tearDownClass(cls): - try: - sys.path.remove(cls._zip_path) - except ValueError: - pass - - try: - del sys.path_importer_cache[cls._zip_path] - del sys.modules[cls.data.__name__] - except KeyError: - pass - - try: - del cls.data - del cls._zip_path - except AttributeError: - pass + ZIP_MODULE = 'data01' def setUp(self): - modules = import_helper.modules_setup() - self.addCleanup(import_helper.modules_cleanup, *modules) + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + self.fixtures.enter_context(import_helper.isolated_modules()) + + temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) + modules = pathlib.Path(temp_dir) / 'zipped modules.zip' + src_path = pathlib.Path(__file__).parent.joinpath(self.ZIP_MODULE) + self.fixtures.enter_context( + import_helper.DirsOnSysPath(str(zip_.make_zip_file(src_path, modules))) + ) + + self.data = importlib.import_module(self.ZIP_MODULE) class ZipSetup(ZipSetupBase): - ZIP_MODULE = zipdata01 # type: ignore + pass diff --git a/Lib/test/test_importlib/resources/zip.py b/Lib/test/test_importlib/resources/zip.py new file mode 100755 index 00000000000000..4dcf6facc770cb --- /dev/null +++ b/Lib/test/test_importlib/resources/zip.py @@ -0,0 +1,30 @@ +""" +Generate zip test data files. +""" + +import contextlib +import os +import pathlib +import zipfile + + +def make_zip_file(src, dst): + """ + Zip the files in src into a new zipfile at dst. + """ + with zipfile.ZipFile(dst, 'w') as zf: + for src_path, rel in walk(src): + dst_name = src.name / pathlib.PurePosixPath(rel.as_posix()) + zf.write(src_path, dst_name) + zipfile._path.CompleteDirs.inject(zf) + return dst + + +def walk(datapath): + for dirpath, dirnames, filenames in os.walk(datapath): + with contextlib.suppress(ValueError): + dirnames.remove('__pycache__') + for filename in filenames: + res = pathlib.Path(dirpath) / filename + rel = res.relative_to(datapath) + yield res, rel diff --git a/Makefile.pre.in b/Makefile.pre.in index 9a2fc34f030662..22dba279faa935 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2438,6 +2438,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_importlib/resources/data03/namespace/portion1 \ test/test_importlib/resources/data03/namespace/portion2 \ test/test_importlib/resources/namespacedata01 \ + test/test_importlib/resources/namespacedata01/subdirectory \ test/test_importlib/resources/zipdata01 \ test/test_importlib/resources/zipdata02 \ test/test_importlib/source \ diff --git a/Misc/NEWS.d/next/Library/2024-03-19-21-41-31.gh-issue-106531.Mgd--6.rst b/Misc/NEWS.d/next/Library/2024-03-19-21-41-31.gh-issue-106531.Mgd--6.rst new file mode 100644 index 00000000000000..6a5783c5ad9846 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-19-21-41-31.gh-issue-106531.Mgd--6.rst @@ -0,0 +1,6 @@ +In :mod:`importlib.resources`, sync with `importlib_resources 6.3.2 +`_, +including: ``MultiplexedPath`` now expects ``Traversable`` paths, +deprecating string arguments to ``MultiplexedPath``; Enabled support for +resources in namespace packages in zip files; Fixed ``NotADirectoryError`` +when calling files on a subdirectory of a namespace package. From a8f1152b70d707340b394689cd09aa0831da3601 Mon Sep 17 00:00:00 2001 From: "d.grigonis" Date: Tue, 4 Jun 2024 10:44:49 +0300 Subject: [PATCH 071/373] gh-119879: str.find(): Utilize last character gap for two-way periodic needles (#119880) --- ...-06-02-06-12-35.gh-issue-119879.Jit951.rst | 1 + Objects/stringlib/fastsearch.h | 63 ++++++++++--------- 2 files changed, 36 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-02-06-12-35.gh-issue-119879.Jit951.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-02-06-12-35.gh-issue-119879.Jit951.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-02-06-12-35.gh-issue-119879.Jit951.rst new file mode 100644 index 00000000000000..89de6b0299a35a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-02-06-12-35.gh-issue-119879.Jit951.rst @@ -0,0 +1 @@ +String search is now slightly faster for certain cases. It now utilizes last character gap (good suffix rule) for two-way periodic needles. diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 257b7bd6788ad2..309ed1554f4699 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -256,7 +256,7 @@ STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle, The local period of the cut is the minimal length of a string w such that (left endswith w or w endswith left) - and (right startswith w or w startswith left). + and (right startswith w or w startswith right). The Critical Factorization Theorem says that this maximal local period is the global period of the string. @@ -337,21 +337,20 @@ STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, if (p->is_periodic) { assert(p->cut <= len_needle/2); assert(p->cut < p->period); - p->gap = 0; // unused } else { // A lower bound on the period p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; - // The gap between the last character and the previous - // occurrence of an equivalent character (modulo TABLE_SIZE) - p->gap = len_needle; - STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; - for (Py_ssize_t i = len_needle - 2; i >= 0; i--) { - STRINGLIB_CHAR x = needle[i] & TABLE_MASK; - if (x == last) { - p->gap = len_needle - 1 - i; - break; - } + } + // The gap between the last character and the previous + // occurrence of an equivalent character (modulo TABLE_SIZE) + p->gap = len_needle; + STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; + for (Py_ssize_t i = len_needle - 2; i >= 0; i--) { + STRINGLIB_CHAR x = needle[i] & TABLE_MASK; + if (x == last) { + p->gap = len_needle - 1 - i; + break; } } // Fill up a compressed Boyer-Moore "Bad Character" table @@ -383,6 +382,8 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, const STRINGLIB_CHAR *window; LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); + Py_ssize_t gap = p->gap; + Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); if (p->is_periodic) { LOG("Needle is periodic.\n"); Py_ssize_t memory = 0; @@ -408,8 +409,16 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, Py_ssize_t i = Py_MAX(cut, memory); for (; i < len_needle; i++) { if (needle[i] != window[i]) { - LOG("Right half does not match.\n"); - window_last += i - cut + 1; + if (i < gap_jump_end) { + LOG("Early right half mismatch: jump by gap.\n"); + assert(gap >= i - cut + 1); + window_last += gap; + } + else { + LOG("Late right half mismatch: jump by n (>gap)\n"); + assert(i - cut + 1 > gap); + window_last += i - cut + 1; + } memory = 0; goto periodicwindowloop; } @@ -442,10 +451,8 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, } } else { - Py_ssize_t gap = p->gap; period = Py_MAX(gap, period); LOG("Needle is not periodic.\n"); - Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); windowloop: while (window_last < haystack_end) { for (;;) { @@ -463,19 +470,19 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, window = window_last - len_needle + 1; assert((window[len_needle - 1] & TABLE_MASK) == (needle[len_needle - 1] & TABLE_MASK)); - for (Py_ssize_t i = cut; i < gap_jump_end; i++) { - if (needle[i] != window[i]) { - LOG("Early right half mismatch: jump by gap.\n"); - assert(gap >= i - cut + 1); - window_last += gap; - goto windowloop; - } - } - for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) { + Py_ssize_t i = cut; + for (; i < len_needle; i++) { if (needle[i] != window[i]) { - LOG("Late right half mismatch.\n"); - assert(i - cut + 1 > gap); - window_last += i - cut + 1; + if (i < gap_jump_end) { + LOG("Early right half mismatch: jump by gap.\n"); + assert(gap >= i - cut + 1); + window_last += gap; + } + else { + LOG("Late right half mismatch: jump by n (>gap)\n"); + assert(i - cut + 1 > gap); + window_last += i - cut + 1; + } goto windowloop; } } From 5c48eb0cc6c3e84aafda0a734a05ecec14fc0ccf Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 4 Jun 2024 09:17:45 +0100 Subject: [PATCH 072/373] gh-119070: Update test_shebang_executable_extension to always use non-installed version (GH-119846) --- Lib/test/test_launcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_launcher.py b/Lib/test/test_launcher.py index 6d358ac6f16a27..58baae25df3df7 100644 --- a/Lib/test/test_launcher.py +++ b/Lib/test/test_launcher.py @@ -766,9 +766,9 @@ def test_shebang_command_in_venv(self): self.assertEqual(data["stdout"].strip(), f"{quote(exe)} arg1 {quote(script)}") def test_shebang_executable_extension(self): - with self.script('#! /usr/bin/env python3.12') as script: - data = self.run_py([script]) - expect = "# Search PATH for python3.12.exe" + with self.script('#! /usr/bin/env python3.99') as script: + data = self.run_py([script], expect_returncode=103) + expect = "# Search PATH for python3.99.exe" actual = [line.strip() for line in data["stderr"].splitlines() if line.startswith("# Search PATH")] self.assertEqual([expect], actual) From 26e5c6e8351adb1a77a88920ff33fc8ebee9a99e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Jun 2024 11:23:55 +0200 Subject: [PATCH 073/373] gh-119613: Soft deprecate the Py_MEMCPY() macro (#120020) Use directly memcpy() instead. --- Include/pyport.h | 1 + .../next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst diff --git a/Include/pyport.h b/Include/pyport.h index 2ba81a4be42822..1f7a9b41e0ae2b 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -180,6 +180,7 @@ typedef Py_ssize_t Py_ssize_clean_t; # define Py_LOCAL_INLINE(type) static inline type #endif +// Soft deprecated since Python 3.14, use memcpy() instead. #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_MEMCPY memcpy #endif diff --git a/Misc/NEWS.d/next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst b/Misc/NEWS.d/next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst new file mode 100644 index 00000000000000..11f075b79e6f67 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst @@ -0,0 +1,2 @@ +Soft deprecate the :c:macro:`!Py_MEMCPY` macro: use directly ``memcpy()`` +instead. Patch by Victor Stinner. From 5a1205b641df133932ed4c65b9a4ff5724e89963 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Jun 2024 11:39:07 +0200 Subject: [PATCH 074/373] gh-111499: Fix PYTHONMALLOCSTATS at Python exit (#120021) Call _PyObject_DebugMallocStats() earlier in Py_FinalizeEx(), before the interpreter is deleted. --- Python/pylifecycle.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 67bbbd01ca0c48..cbdf5c1b771fff 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -2119,6 +2119,12 @@ Py_FinalizeEx(void) } #endif /* Py_TRACE_REFS */ +#ifdef WITH_PYMALLOC + if (malloc_stats) { + _PyObject_DebugMallocStats(stderr); + } +#endif + finalize_interp_delete(tstate->interp); #ifdef Py_REF_DEBUG @@ -2129,12 +2135,6 @@ Py_FinalizeEx(void) #endif _Py_FinalizeAllocatedBlocks(runtime); -#ifdef WITH_PYMALLOC - if (malloc_stats) { - _PyObject_DebugMallocStats(stderr); - } -#endif - call_ll_exitfuncs(runtime); _PyRuntime_Finalize(); From 9e052619a6d32051394444c24d3185db1735a893 Mon Sep 17 00:00:00 2001 From: Xie Yanbo Date: Tue, 4 Jun 2024 18:22:22 +0800 Subject: [PATCH 075/373] Fix typos in documentation and comments (#119763) --- Python/brc.c | 4 ++-- Python/ceval.c | 2 +- Python/flowgraph.c | 2 +- Python/gc.c | 4 ++-- Python/import.c | 2 +- Python/optimizer.c | 2 +- Python/vm-state.md | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Python/brc.c b/Python/brc.c index 8f87bc33007bcf..d27687052aec19 100644 --- a/Python/brc.c +++ b/Python/brc.c @@ -14,7 +14,7 @@ // thread states within each bucket. // // The queueing thread uses the eval breaker mechanism to notify the owning -// thread that it has objects to merge. Additionaly, all queued objects are +// thread that it has objects to merge. Additionally, all queued objects are // merged during GC. #include "Python.h" #include "pycore_object.h" // _Py_ExplicitMergeRefcount @@ -197,7 +197,7 @@ _Py_brc_after_fork(PyInterpreterState *interp) { // Unlock all bucket mutexes. Some of the buckets may be locked because // locks can be handed off to a parked thread (see lock.c). We don't have - // to worry about consistency here, becuase no thread can be actively + // to worry about consistency here, because no thread can be actively // modifying a bucket, but it might be paused (not yet woken up) on a // PyMutex_Lock while holding that lock. for (Py_ssize_t i = 0; i < _Py_BRC_NUM_BUCKETS; i++) { diff --git a/Python/ceval.c b/Python/ceval.c index 324d062fe9bb43..e3968b07486463 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1478,7 +1478,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, localsplus[total_args] = u; } else if (argcount > n) { - /* Too many postional args. Error is reported later */ + /* Too many positional args. Error is reported later */ for (j = n; j < argcount; j++) { Py_DECREF(args[j]); } diff --git a/Python/flowgraph.c b/Python/flowgraph.c index b0c8004130fb07..17617e119fdaa4 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2861,7 +2861,7 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g, } /* This is used by _PyCompile_Assemble to fill in the jump and exception - * targets in a synthetic CFG (which is not the ouptut of the builtin compiler). + * targets in a synthetic CFG (which is not the output of the builtin compiler). */ int _PyCfg_JumpLabelsToTargets(cfg_builder *g) diff --git a/Python/gc.c b/Python/gc.c index aa8b216124c36a..b87697e1e5ecfd 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1,5 +1,5 @@ // This implements the reference cycle garbage collector. -// The Python module inteface to the collector is in gcmodule.c. +// The Python module interface to the collector is in gcmodule.c. // See https://devguide.python.org/internals/garbage-collector/ #include "Python.h" @@ -1260,7 +1260,7 @@ gc_list_set_space(PyGC_Head *list, int space) * the incremental collector must progress through the old * space faster than objects are added to the old space. * - * Each young or incremental collection adds a numebr of + * Each young or incremental collection adds a number of * objects, S (for survivors) to the old space, and * incremental collectors scan I objects from the old space. * I > S must be true. We also want I > S * N to be where diff --git a/Python/import.c b/Python/import.c index 6fe6df4db4f55e..351d463dcab465 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1961,7 +1961,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, * * However, for single-phase init the module's init function will * create the module, create other objects (and allocate other - * memory), populate it and its module state, and initialze static + * memory), populate it and its module state, and initialize static * types. Some modules store other objects and data in global C * variables and register callbacks with the runtime/stdlib or * even external libraries (which is part of why we can't just diff --git a/Python/optimizer.c b/Python/optimizer.c index 5b4a6ff8cb3dad..4dc3438b6c23a4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1455,7 +1455,7 @@ PyUnstable_Optimizer_NewCounter(void) /* We use a bloomfilter with k = 6, m = 256 * The choice of k and the following constants - * could do with a more rigourous analysis, + * could do with a more rigorous analysis, * but here is a simple analysis: * * We want to keep the false positive rate low. diff --git a/Python/vm-state.md b/Python/vm-state.md index 4c68ba3b575cc8..b3246557dbeea3 100644 --- a/Python/vm-state.md +++ b/Python/vm-state.md @@ -87,4 +87,4 @@ Tier 2 IR entries are all the same size; there is no equivalent to `EXTENDED_ARG - **opcode**: Sometimes the same as a Tier 1 opcode, sometimes a separate micro opcode. Tier 2 opcodes are 9 bits (as opposed to Tier 1 opcodes, which fit in 8 bits). By convention, Tier 2 opcode names start with `_`. - **oparg**: The argument. Usually the same as the Tier 1 oparg after expansion of `EXTENDED_ARG` prefixes. Up to 32 bits. -- **operand**: An aditional argument, Typically the value of *one* cache item from the Tier 1 inline cache, up to 64 bits. +- **operand**: An additional argument, Typically the value of *one* cache item from the Tier 1 inline cache, up to 64 bits. From dce14bb2dce7887df40ae5c13b0d13e0dafceff7 Mon Sep 17 00:00:00 2001 From: Kaundur Date: Tue, 4 Jun 2024 12:48:05 +0100 Subject: [PATCH 076/373] gh-118868: logging QueueHandler fix passing of kwargs (GH-118869) Co-authored-by: Nice Zombies Co-authored-by: Vinay Sajip --- Lib/logging/config.py | 16 +++++----- Lib/test/test_logging.py | 29 +++++++++++++++++++ ...-05-09-21-36-11.gh-issue-118868.uckxxP.rst | 2 ++ 3 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-09-21-36-11.gh-issue-118868.uckxxP.rst diff --git a/Lib/logging/config.py b/Lib/logging/config.py index 860e4751207470..ac45d6809c805c 100644 --- a/Lib/logging/config.py +++ b/Lib/logging/config.py @@ -725,16 +725,16 @@ def add_filters(self, filterer, filters): def _configure_queue_handler(self, klass, **kwargs): if 'queue' in kwargs: - q = kwargs['queue'] + q = kwargs.pop('queue') else: q = queue.Queue() # unbounded - rhl = kwargs.get('respect_handler_level', False) - if 'listener' in kwargs: - lklass = kwargs['listener'] - else: - lklass = logging.handlers.QueueListener - listener = lklass(q, *kwargs.get('handlers', []), respect_handler_level=rhl) - handler = klass(q) + + rhl = kwargs.pop('respect_handler_level', False) + lklass = kwargs.pop('listener', logging.handlers.QueueListener) + handlers = kwargs.pop('handlers', []) + + listener = lklass(q, *handlers, respect_handler_level=rhl) + handler = klass(q, **kwargs) handler.listener = listener return handler diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 97d7c9fb167ec1..9ebd3457a18d68 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -3976,6 +3976,35 @@ def test_111615(self): } logging.config.dictConfig(config) + # gh-118868: check if kwargs are passed to logging QueueHandler + def test_kwargs_passing(self): + class CustomQueueHandler(logging.handlers.QueueHandler): + def __init__(self, *args, **kwargs): + super().__init__(queue.Queue()) + self.custom_kwargs = kwargs + + custom_kwargs = {'foo': 'bar'} + + config = { + 'version': 1, + 'handlers': { + 'custom': { + 'class': CustomQueueHandler, + **custom_kwargs + }, + }, + 'root': { + 'level': 'DEBUG', + 'handlers': ['custom'] + } + } + + logging.config.dictConfig(config) + + handler = logging.getHandlerByName('custom') + self.assertEqual(handler.custom_kwargs, custom_kwargs) + + class ManagerTest(BaseTest): def test_manager_loggerclass(self): logged = [] diff --git a/Misc/NEWS.d/next/Library/2024-05-09-21-36-11.gh-issue-118868.uckxxP.rst b/Misc/NEWS.d/next/Library/2024-05-09-21-36-11.gh-issue-118868.uckxxP.rst new file mode 100644 index 00000000000000..372a809d9594b0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-09-21-36-11.gh-issue-118868.uckxxP.rst @@ -0,0 +1,2 @@ +Fixed issue where kwargs were no longer passed to the logging handler +QueueHandler From 99d945c0c006e3246ac00338e37c443c6e08fc5c Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Tue, 4 Jun 2024 13:20:50 +0100 Subject: [PATCH 077/373] =?UTF-8?q?gh-119819:=20Fix=20regression=20to=20al?= =?UTF-8?q?low=20logging=20configuration=20with=20multipr=E2=80=A6=20(GH-1?= =?UTF-8?q?20030)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Lib/logging/config.py | 4 ++- Lib/test/test_logging.py | 26 +++++++++++++++++++ ...-06-04-12-23-01.gh-issue-119819.WKKrYh.rst | 2 ++ 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-04-12-23-01.gh-issue-119819.WKKrYh.rst diff --git a/Lib/logging/config.py b/Lib/logging/config.py index ac45d6809c805c..0b10bf82b60a36 100644 --- a/Lib/logging/config.py +++ b/Lib/logging/config.py @@ -781,8 +781,10 @@ def configure_handler(self, config): # raise ValueError('No handlers specified for a QueueHandler') if 'queue' in config: from multiprocessing.queues import Queue as MPQueue + from multiprocessing import Manager as MM + proxy_queue = MM().Queue() qspec = config['queue'] - if not isinstance(qspec, (queue.Queue, MPQueue)): + if not isinstance(qspec, (queue.Queue, MPQueue, type(proxy_queue))): if isinstance(qspec, str): q = self.resolve(qspec) if not callable(q): diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 9ebd3457a18d68..d3e5ac2be2e21e 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -3926,6 +3926,32 @@ def test_config_queue_handler(self): msg = str(ctx.exception) self.assertEqual(msg, "Unable to configure handler 'ah'") + @unittest.skipIf(support.is_wasi, "WASI does not have multiprocessing.") + def test_multiprocessing_queues(self): + # See gh-119819 + cd = copy.deepcopy(self.config_queue_handler) + from multiprocessing import Queue as MQ, Manager as MM + q1 = MQ() # this can't be pickled + q2 = MM().Queue() # a proxy queue for use when pickling is needed + for qspec in (q1, q2): + fn = make_temp_file('.log', 'test_logging-cmpqh-') + cd['handlers']['h1']['filename'] = fn + cd['handlers']['ah']['queue'] = qspec + qh = None + try: + self.apply_config(cd) + qh = logging.getHandlerByName('ah') + self.assertEqual(sorted(logging.getHandlerNames()), ['ah', 'h1']) + self.assertIsNotNone(qh.listener) + self.assertIs(qh.queue, qspec) + self.assertIs(qh.listener.queue, qspec) + finally: + h = logging.getHandlerByName('h1') + if h: + self.addCleanup(closeFileHandler, h, fn) + else: + self.addCleanup(os.remove, fn) + def test_90195(self): # See gh-90195 config = { diff --git a/Misc/NEWS.d/next/Library/2024-06-04-12-23-01.gh-issue-119819.WKKrYh.rst b/Misc/NEWS.d/next/Library/2024-06-04-12-23-01.gh-issue-119819.WKKrYh.rst new file mode 100644 index 00000000000000..f9e49c00f671f2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-04-12-23-01.gh-issue-119819.WKKrYh.rst @@ -0,0 +1,2 @@ +Fix regression to allow logging configuration with multiprocessing queue +types. From bd8c1f97e1709b5e8b07c31b1bc7b73acc76169d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mark=20Jason=20Dominus=20=28=E9=99=B6=E6=95=8F=E4=BF=AE=29?= Date: Tue, 4 Jun 2024 08:59:56 -0400 Subject: [PATCH 078/373] gh-94808: Reorganize _make_posargs and mark unused code (GH-119227) * Reorganize four-way if-elsif-elsif-elsif as nested if-elses * Mark unused branch in _make_posargs `names_with_default` is never `NULL`, even if there are no names with defaults. In that case it points to a structure with `size` zero. Rather than eliminating the branch, we leave it behind with an `assert(0)` in case a future change to the grammar exercises the branch. --- Parser/action_helpers.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 3f6c282ffa7a68..91b7e2f1058423 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -543,22 +543,30 @@ _make_posargs(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_default, asdl_arg_seq **posargs) { - if (plain_names != NULL && names_with_default != NULL) { - asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default); - if (!names_with_default_names) { - return -1; + + if (names_with_default != NULL) { + if (plain_names != NULL) { + asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default); + if (!names_with_default_names) { + return -1; + } + *posargs = (asdl_arg_seq*)_PyPegen_join_sequences( + p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names); + } + else { + *posargs = _get_names(p, names_with_default); } - *posargs = (asdl_arg_seq*)_PyPegen_join_sequences( - p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names); - } - else if (plain_names == NULL && names_with_default != NULL) { - *posargs = _get_names(p, names_with_default); - } - else if (plain_names != NULL && names_with_default == NULL) { - *posargs = plain_names; } else { - *posargs = _Py_asdl_arg_seq_new(0, p->arena); + if (plain_names != NULL) { + // With the current grammar, we never get here. + // If that has changed, remove the assert, and test thoroughly. + assert(0); + *posargs = plain_names; + } + else { + *posargs = _Py_asdl_arg_seq_new(0, p->arena); + } } return *posargs == NULL ? -1 : 0; } From e69d068ad0bd6a25434ea476a647b635da4d82bb Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 4 Jun 2024 09:42:13 -0400 Subject: [PATCH 079/373] gh-117657: Fix race involving GC and heap initialization (#119923) The `_PyThreadState_Bind()` function is called before the first `PyEval_AcquireThread()` so it's not synchronized with the stop the world GC. We had a race where `gc_visit_heaps()` might visit a thread's heap while it's being initialized. Use a simple atomic int to avoid visiting heaps for threads that are not yet fully initialized (i.e., before `tstate_mimalloc_bind()` is called). The race was reproducible by running: `python Lib/test/test_importlib/partial/pool_in_threads.py`. --- Include/internal/pycore_mimalloc.h | 1 + Python/gc_free_threading.c | 4 ++++ Python/pystate.c | 2 ++ Tools/tsan/suppressions_free_threading.txt | 3 --- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_mimalloc.h b/Include/internal/pycore_mimalloc.h index 100f78d53021ee..d10b01d5b49b19 100644 --- a/Include/internal/pycore_mimalloc.h +++ b/Include/internal/pycore_mimalloc.h @@ -52,6 +52,7 @@ struct _mimalloc_thread_state { mi_heap_t *current_object_heap; mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT]; mi_tld_t tld; + int initialized; struct llist_node page_list; }; #endif diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index d005b79ff40dbf..f19362c9573812 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -252,6 +252,10 @@ gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor // visit each thread's heaps for GC objects for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc; + if (!_Py_atomic_load_int(&m->initialized)) { + // The thread may not have called tstate_mimalloc_bind() yet. + continue; + } arg->offset = offset_base; if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC], true, diff --git a/Python/pystate.c b/Python/pystate.c index d0293915db7689..e1a95907b57d20 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3074,6 +3074,8 @@ tstate_mimalloc_bind(PyThreadState *tstate) // _PyObject_GC_New() and similar functions temporarily override this to // use one of the GC heaps. mts->current_object_heap = &mts->heaps[_Py_MIMALLOC_HEAP_OBJECT]; + + _Py_atomic_store_int(&mts->initialized, 1); #endif } diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index d5fcac61f0db04..8b64d1ff321858 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -25,7 +25,6 @@ race:free_threadstate race_top:_add_to_weak_set race_top:_in_weak_set -race_top:_mi_heap_delayed_free_partial race_top:_PyEval_EvalFrameDefault race_top:_PyImport_AcquireLock race_top:_PyImport_ReleaseLock @@ -33,7 +32,6 @@ race_top:_PyType_HasFeature race_top:assign_version_tag race_top:insertdict race_top:lookup_tp_dict -race_top:mi_heap_visit_pages race_top:PyMember_GetOne race_top:PyMember_SetOne race_top:new_reference @@ -58,7 +56,6 @@ race_top:_Py_slot_tp_getattr_hook race_top:add_threadstate race_top:dump_traceback race_top:fatal_error -race_top:mi_page_decode_padding race_top:_multiprocessing_SemLock_release_impl race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize From ff1857d6ed52fab8ef1507c289d89ee545ca6478 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 4 Jun 2024 16:24:22 +0200 Subject: [PATCH 080/373] gh-120029: export `DEF_TYPE_PARAM` compiler flag (#120028) --- Doc/library/symtable.rst | 4 ++++ Lib/symtable.py | 17 +++++++++++++---- Lib/test/test_symtable.py | 2 ++ ...24-06-04-14-54-46.gh-issue-120029._1YdTf.rst | 2 ++ Modules/symtablemodule.c | 4 +++- 5 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-04-14-54-46.gh-issue-120029._1YdTf.rst diff --git a/Doc/library/symtable.rst b/Doc/library/symtable.rst index 0480502158433a..e17a33f7feb1ab 100644 --- a/Doc/library/symtable.rst +++ b/Doc/library/symtable.rst @@ -151,6 +151,10 @@ Examining Symbol Tables Return ``True`` if the symbol is a parameter. + .. method:: is_type_parameter() + + Return ``True`` if the symbol is a type parameter. + .. method:: is_global() Return ``True`` if the symbol is global. diff --git a/Lib/symtable.py b/Lib/symtable.py index 17f820abd56660..ba2f0dafcd0063 100644 --- a/Lib/symtable.py +++ b/Lib/symtable.py @@ -1,9 +1,13 @@ """Interface to the compiler's internal symbol tables""" import _symtable -from _symtable import (USE, DEF_GLOBAL, DEF_NONLOCAL, DEF_LOCAL, DEF_PARAM, - DEF_IMPORT, DEF_BOUND, DEF_ANNOT, SCOPE_OFF, SCOPE_MASK, FREE, - LOCAL, GLOBAL_IMPLICIT, GLOBAL_EXPLICIT, CELL) +from _symtable import ( + USE, + DEF_GLOBAL, DEF_NONLOCAL, DEF_LOCAL, + DEF_PARAM, DEF_TYPE_PARAM, DEF_IMPORT, DEF_BOUND, DEF_ANNOT, + SCOPE_OFF, SCOPE_MASK, + FREE, LOCAL, GLOBAL_IMPLICIT, GLOBAL_EXPLICIT, CELL +) import weakref @@ -253,13 +257,18 @@ def is_referenced(self): """Return *True* if the symbol is used in its block. """ - return bool(self.__flags & _symtable.USE) + return bool(self.__flags & USE) def is_parameter(self): """Return *True* if the symbol is a parameter. """ return bool(self.__flags & DEF_PARAM) + def is_type_parameter(self): + """Return *True* if the symbol is a type parameter. + """ + return bool(self.__flags & DEF_TYPE_PARAM) + def is_global(self): """Return *True* if the symbol is global. """ diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py index 92b78a8086a83d..ef2a228b15ed4e 100644 --- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -299,6 +299,8 @@ def test_symbol_repr(self): "") self.assertEqual(repr(self.other_internal.lookup("some_var")), "") + self.assertEqual(repr(self.GenericMine.lookup("T")), + "") def test_symtable_entry_repr(self): expected = f"" diff --git a/Misc/NEWS.d/next/Library/2024-06-04-14-54-46.gh-issue-120029._1YdTf.rst b/Misc/NEWS.d/next/Library/2024-06-04-14-54-46.gh-issue-120029._1YdTf.rst new file mode 100644 index 00000000000000..e8ea1077139f71 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-04-14-54-46.gh-issue-120029._1YdTf.rst @@ -0,0 +1,2 @@ +Expose :meth:`symtable.Symbol.is_type_parameter` in the :mod:`symtable` +module. Patch by Bénédikt Tran. diff --git a/Modules/symtablemodule.c b/Modules/symtablemodule.c index b4dbb54c3b47b0..63c4dd4225298d 100644 --- a/Modules/symtablemodule.c +++ b/Modules/symtablemodule.c @@ -75,6 +75,7 @@ symtable_init_constants(PyObject *m) if (PyModule_AddIntMacro(m, DEF_NONLOCAL) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_LOCAL) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_PARAM) < 0) return -1; + if (PyModule_AddIntMacro(m, DEF_TYPE_PARAM) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_FREE) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_FREE_CLASS) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_IMPORT) < 0) return -1; @@ -83,7 +84,8 @@ symtable_init_constants(PyObject *m) if (PyModule_AddIntConstant(m, "TYPE_FUNCTION", FunctionBlock) < 0) return -1; - if (PyModule_AddIntConstant(m, "TYPE_CLASS", ClassBlock) < 0) return -1; + if (PyModule_AddIntConstant(m, "TYPE_CLASS", ClassBlock) < 0) + return -1; if (PyModule_AddIntConstant(m, "TYPE_MODULE", ModuleBlock) < 0) return -1; if (PyModule_AddIntConstant(m, "TYPE_ANNOTATION", AnnotationBlock) < 0) From 4dcd91ceafce91ec37bb1a9d544e41fc65578994 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 4 Jun 2024 11:20:01 -0400 Subject: [PATCH 081/373] gh-119588: Update docs to reflect decision to include the change with Python 3.13 and not 3.12. (#120043) --- Doc/library/zipfile.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index a4d9a1852f8f0d..5583c6b24be5c6 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -591,8 +591,8 @@ Path objects are traversable using the ``/`` operator or ``joinpath``. .. versionadded:: 3.12 - .. versionchanged:: 3.12.4 - Prior to 3.12.4, ``is_symlink`` would unconditionally return ``False``. + .. versionchanged:: 3.13 + Previously, ``is_symlink`` would unconditionally return ``False``. .. method:: Path.exists() From 8fc7653766b106bdbc4ff6154e0020aea4ab15e6 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 4 Jun 2024 18:09:31 +0200 Subject: [PATCH 082/373] gh-120041: Do not use append_to_screen when completions are visible (GH-120042) --- Lib/_pyrepl/commands.py | 7 +++++- Lib/_pyrepl/completing_reader.py | 20 +++++++++------- Lib/test/test_pyrepl/support.py | 2 +- Lib/test/test_pyrepl/test_reader.py | 37 ++++++++++++++++++++++++++++- 4 files changed, 55 insertions(+), 11 deletions(-) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 2ef5dada9d9e58..b967f5206614f8 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -365,7 +365,12 @@ def do(self) -> None: r = self.reader text = self.event * r.get_arg() r.insert(text) - if len(text) == 1 and r.pos == len(r.buffer): + if ( + len(text) == 1 and + r.pos == len(r.buffer) and + not r.cmpltn_menu_visible and # type: ignore[attr-defined] + not r.cmpltn_message_visible # type: ignore[attr-defined] + ): r.calc_screen = r.append_to_screen diff --git a/Lib/_pyrepl/completing_reader.py b/Lib/_pyrepl/completing_reader.py index c11d2dabdd2792..215ad8753c9f8b 100644 --- a/Lib/_pyrepl/completing_reader.py +++ b/Lib/_pyrepl/completing_reader.py @@ -187,18 +187,20 @@ def do(self) -> None: if p: r.insert(p) if last_is_completer: - if not r.cmpltn_menu_visible: - r.cmpltn_menu_visible = True + r.cmpltn_menu_visible = True + r.cmpltn_message_visible = False r.cmpltn_menu, r.cmpltn_menu_end = build_menu( r.console, completions, r.cmpltn_menu_end, r.use_brackets, r.sort_in_column) r.dirty = True - elif stem + p in completions: - r.msg = "[ complete but not unique ]" - r.dirty = True - else: - r.msg = "[ not unique ]" - r.dirty = True + elif not r.cmpltn_menu_visible: + r.cmpltn_message_visible = True + if stem + p in completions: + r.msg = "[ complete but not unique ]" + r.dirty = True + else: + r.msg = "[ not unique ]" + r.dirty = True class self_insert(commands.self_insert): @@ -236,6 +238,7 @@ class CompletingReader(Reader): ### Instance variables cmpltn_menu: list[str] = field(init=False) cmpltn_menu_visible: bool = field(init=False) + cmpltn_message_visible: bool = field(init=False) cmpltn_menu_end: int = field(init=False) cmpltn_menu_choices: list[str] = field(init=False) @@ -271,6 +274,7 @@ def finish(self) -> None: def cmpltn_reset(self) -> None: self.cmpltn_menu = [] self.cmpltn_menu_visible = False + self.cmpltn_message_visible = False self.cmpltn_menu_end = 0 self.cmpltn_menu_choices = [] diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py index d2f5429aea7a11..e807b5f3404550 100644 --- a/Lib/test/test_pyrepl/support.py +++ b/Lib/test/test_pyrepl/support.py @@ -39,7 +39,7 @@ def code_to_events(code: str): def prepare_reader(console: Console, **kwargs): - config = ReadlineConfig(readline_completer=None) + config = ReadlineConfig(readline_completer=kwargs.pop("readline_completer", None)) reader = ReadlineAlikeReader(console=console, config=config) reader.more_lines = partial(more_lines, namespace=None) reader.paste_mode = True # Avoid extra indents diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 9fb956b655594f..d02815bfa11d74 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -1,5 +1,6 @@ import itertools import functools +import rlcompleter from unittest import TestCase from .support import handle_all_events, handle_events_narrow_console, code_to_events, prepare_reader @@ -9,7 +10,7 @@ class TestReader(TestCase): def assert_screen_equals(self, reader, expected): - actual = reader.calc_screen() + actual = reader.screen expected = expected.split("\n") self.assertListEqual(actual, expected) @@ -208,3 +209,37 @@ def test_prompt_length(self): prompt, l = Reader.process_prompt(ps1) self.assertEqual(prompt, "\033[0;32m樂>\033[0m> ") self.assertEqual(l, 5) + + def test_completions_updated_on_key_press(self): + namespace = {"itertools": itertools} + code = "itertools." + events = itertools.chain(code_to_events(code), [ + Event(evt='key', data='\t', raw=bytearray(b'\t')), # Two tabs for completion + Event(evt='key', data='\t', raw=bytearray(b'\t')), + ], code_to_events("a")) + + completing_reader = functools.partial( + prepare_reader, + readline_completer=rlcompleter.Completer(namespace).complete + ) + reader, _ = handle_all_events(events, prepare_reader=completing_reader) + + actual = reader.screen + self.assertEqual(len(actual), 2) + self.assertEqual(actual[0].rstrip(), "itertools.accumulate(") + self.assertEqual(actual[1], f"{code}a") + + def test_key_press_on_tab_press_once(self): + namespace = {"itertools": itertools} + code = "itertools." + events = itertools.chain(code_to_events(code), [ + Event(evt='key', data='\t', raw=bytearray(b'\t')), + ], code_to_events("a")) + + completing_reader = functools.partial( + prepare_reader, + readline_completer=rlcompleter.Completer(namespace).complete + ) + reader, _ = handle_all_events(events, prepare_reader=completing_reader) + + self.assert_screen_equals(reader, f"{code}a") From 5f03f0913413ecc4942367cf62ce3a5a5b5d84a5 Mon Sep 17 00:00:00 2001 From: Trey Hunner Date: Tue, 4 Jun 2024 09:28:08 -0700 Subject: [PATCH 083/373] Fix incorrect pull GitHub link in What's New (#120045) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 903de3c04b4a07..dfbeadce0eea27 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1006,7 +1006,7 @@ random ------ * Add a :ref:`command-line interface `. - (Contributed by Hugo van Kemenade in :gh:`54321`.) + (Contributed by Hugo van Kemenade in :gh:`118131`.) re -- From 7111d9605f9db7aa0b095bb8ece7ccc0b8115c3f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 4 Jun 2024 19:36:37 +0300 Subject: [PATCH 084/373] gh-89928: Fix integer conversion of device numbers (GH-31794) Fix os.major(), os.minor() and os.makedev(). Support device numbers larger than 2**63-1. Support non-existent device number (NODEV). --- Lib/test/test_posix.py | 15 +++- .../2022-03-10-16-47-57.bpo-45767.ywmyo1.rst | 3 + Modules/clinic/posixmodule.c.h | 32 +++---- Modules/posixmodule.c | 88 +++++++++++++++---- 4 files changed, 99 insertions(+), 39 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-03-10-16-47-57.bpo-45767.ywmyo1.rst diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py index 7e5f04c22bd6d3..908354cb8574d1 100644 --- a/Lib/test/test_posix.py +++ b/Lib/test/test_posix.py @@ -704,7 +704,8 @@ def test_makedev(self): self.assertEqual(posix.major(dev), major) self.assertRaises(TypeError, posix.major, float(dev)) self.assertRaises(TypeError, posix.major) - self.assertRaises((ValueError, OverflowError), posix.major, -1) + for x in -2, 2**64, -2**63-1: + self.assertRaises((ValueError, OverflowError), posix.major, x) minor = posix.minor(dev) self.assertIsInstance(minor, int) @@ -712,13 +713,23 @@ def test_makedev(self): self.assertEqual(posix.minor(dev), minor) self.assertRaises(TypeError, posix.minor, float(dev)) self.assertRaises(TypeError, posix.minor) - self.assertRaises((ValueError, OverflowError), posix.minor, -1) + for x in -2, 2**64, -2**63-1: + self.assertRaises((ValueError, OverflowError), posix.minor, x) self.assertEqual(posix.makedev(major, minor), dev) self.assertRaises(TypeError, posix.makedev, float(major), minor) self.assertRaises(TypeError, posix.makedev, major, float(minor)) self.assertRaises(TypeError, posix.makedev, major) self.assertRaises(TypeError, posix.makedev) + for x in -2, 2**32, 2**64, -2**63-1: + self.assertRaises((ValueError, OverflowError), posix.makedev, x, minor) + self.assertRaises((ValueError, OverflowError), posix.makedev, major, x) + + if sys.platform == 'linux': + NODEV = -1 + self.assertEqual(posix.major(NODEV), NODEV) + self.assertEqual(posix.minor(NODEV), NODEV) + self.assertEqual(posix.makedev(NODEV, NODEV), NODEV) def _test_all_chown_common(self, chown_func, first_param, stat_func): """Common code for chown, fchown and lchown tests.""" diff --git a/Misc/NEWS.d/next/Library/2022-03-10-16-47-57.bpo-45767.ywmyo1.rst b/Misc/NEWS.d/next/Library/2022-03-10-16-47-57.bpo-45767.ywmyo1.rst new file mode 100644 index 00000000000000..0cdf1e84157777 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-10-16-47-57.bpo-45767.ywmyo1.rst @@ -0,0 +1,3 @@ +Fix integer conversion in :func:`os.major`, :func:`os.minor`, and +:func:`os.makedev`. Support device numbers larger than ``2**63-1``. Support +non-existent device number (``NODEV``). diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index c7a447b455c594..83dcc7a60c2110 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -8685,7 +8685,7 @@ PyDoc_STRVAR(os_major__doc__, #define OS_MAJOR_METHODDEF \ {"major", (PyCFunction)os_major, METH_O, os_major__doc__}, -static unsigned int +static PyObject * os_major_impl(PyObject *module, dev_t device); static PyObject * @@ -8693,16 +8693,11 @@ os_major(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; dev_t device; - unsigned int _return_value; if (!_Py_Dev_Converter(arg, &device)) { goto exit; } - _return_value = os_major_impl(module, device); - if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + return_value = os_major_impl(module, device); exit: return return_value; @@ -8721,7 +8716,7 @@ PyDoc_STRVAR(os_minor__doc__, #define OS_MINOR_METHODDEF \ {"minor", (PyCFunction)os_minor, METH_O, os_minor__doc__}, -static unsigned int +static PyObject * os_minor_impl(PyObject *module, dev_t device); static PyObject * @@ -8729,16 +8724,11 @@ os_minor(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; dev_t device; - unsigned int _return_value; if (!_Py_Dev_Converter(arg, &device)) { goto exit; } - _return_value = os_minor_impl(module, device); - if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + return_value = os_minor_impl(module, device); exit: return return_value; @@ -8758,25 +8748,23 @@ PyDoc_STRVAR(os_makedev__doc__, {"makedev", _PyCFunction_CAST(os_makedev), METH_FASTCALL, os_makedev__doc__}, static dev_t -os_makedev_impl(PyObject *module, int major, int minor); +os_makedev_impl(PyObject *module, dev_t major, dev_t minor); static PyObject * os_makedev(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - int major; - int minor; + dev_t major; + dev_t minor; dev_t _return_value; if (!_PyArg_CheckPositional("makedev", nargs, 2, 2)) { goto exit; } - major = PyLong_AsInt(args[0]); - if (major == -1 && PyErr_Occurred()) { + if (!_Py_Dev_Converter(args[0], &major)) { goto exit; } - minor = PyLong_AsInt(args[1]); - if (minor == -1 && PyErr_Occurred()) { + if (!_Py_Dev_Converter(args[1], &minor)) { goto exit; } _return_value = os_makedev_impl(module, major, minor); @@ -12795,4 +12783,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=300bd1c54dc43765 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=49c2d7a65f7a9f3b input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index bb35cfd9cdb138..1251ea63348946 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -18,6 +18,7 @@ #include "pycore_fileutils.h" // _Py_closerange() #include "pycore_import.h" // _PyImport_ReInitLock() #include "pycore_initconfig.h" // _PyStatus_EXCEPTION() +#include "pycore_long.h" // _PyLong_IsNegative() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_object.h" // _PyObject_LookupSpecial() #include "pycore_pylifecycle.h" // _PyOS_URandom() @@ -967,16 +968,46 @@ _Py_Gid_Converter(PyObject *obj, gid_t *p) #endif /* MS_WINDOWS */ -#define _PyLong_FromDev PyLong_FromLongLong +static PyObject * +_PyLong_FromDev(dev_t dev) +{ +#ifdef NODEV + if (dev == NODEV) { + return PyLong_FromLongLong((long long)dev); + } +#endif + return PyLong_FromUnsignedLongLong((unsigned long long)dev); +} #if (defined(HAVE_MKNOD) && defined(HAVE_MAKEDEV)) || defined(HAVE_DEVICE_MACROS) static int _Py_Dev_Converter(PyObject *obj, void *p) { - *((dev_t *)p) = PyLong_AsUnsignedLongLong(obj); - if (PyErr_Occurred()) +#ifdef NODEV + if (PyLong_Check(obj) && _PyLong_IsNegative((PyLongObject *)obj)) { + int overflow; + long long result = PyLong_AsLongLongAndOverflow(obj, &overflow); + if (result == -1 && PyErr_Occurred()) { + return 0; + } + if (!overflow && result == (long long)NODEV) { + *((dev_t *)p) = NODEV; + return 1; + } + } +#endif + + unsigned long long result = PyLong_AsUnsignedLongLong(obj); + if (result == (unsigned long long)-1 && PyErr_Occurred()) { + return 0; + } + if ((unsigned long long)(dev_t)result != result) { + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C dev_t"); return 0; + } + *((dev_t *)p) = (dev_t)result; return 1; } #endif /* (HAVE_MKNOD && HAVE_MAKEDEV) || HAVE_DEVICE_MACROS */ @@ -12517,9 +12548,31 @@ os_mknod_impl(PyObject *module, path_t *path, int mode, dev_t device, #endif /* defined(HAVE_MKNOD) && defined(HAVE_MAKEDEV) */ +static PyObject * +major_minor_conv(unsigned int value) +{ +#ifdef NODEV + if (value == (unsigned int)NODEV) { + return PyLong_FromLong((int)NODEV); + } +#endif + return PyLong_FromUnsignedLong(value); +} + +static int +major_minor_check(dev_t value) +{ +#ifdef NODEV + if (value == NODEV) { + return 1; + } +#endif + return (dev_t)(unsigned int)value == value; +} + #ifdef HAVE_DEVICE_MACROS /*[clinic input] -os.major -> unsigned_int +os.major device: dev_t / @@ -12527,16 +12580,16 @@ os.major -> unsigned_int Extracts a device major number from a raw device number. [clinic start generated code]*/ -static unsigned int +static PyObject * os_major_impl(PyObject *module, dev_t device) -/*[clinic end generated code: output=5b3b2589bafb498e input=1e16a4d30c4d4462]*/ +/*[clinic end generated code: output=4071ffee17647891 input=b1a0a14ec9448229]*/ { - return major(device); + return major_minor_conv(major(device)); } /*[clinic input] -os.minor -> unsigned_int +os.minor device: dev_t / @@ -12544,28 +12597,33 @@ os.minor -> unsigned_int Extracts a device minor number from a raw device number. [clinic start generated code]*/ -static unsigned int +static PyObject * os_minor_impl(PyObject *module, dev_t device) -/*[clinic end generated code: output=5e1a25e630b0157d input=0842c6d23f24c65e]*/ +/*[clinic end generated code: output=306cb78e3bc5004f input=2f686e463682a9da]*/ { - return minor(device); + return major_minor_conv(minor(device)); } /*[clinic input] os.makedev -> dev_t - major: int - minor: int + major: dev_t + minor: dev_t / Composes a raw device number from the major and minor device numbers. [clinic start generated code]*/ static dev_t -os_makedev_impl(PyObject *module, int major, int minor) -/*[clinic end generated code: output=881aaa4aba6f6a52 input=4b9fd8fc73cbe48f]*/ +os_makedev_impl(PyObject *module, dev_t major, dev_t minor) +/*[clinic end generated code: output=cad6125c51f5af80 input=2146126ec02e55c1]*/ { + if (!major_minor_check(major) || !major_minor_check(minor)) { + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C unsigned int"); + return (dev_t)-1; + } return makedev(major, minor); } #endif /* HAVE_DEVICE_MACROS */ From e0799352823289fafb8131341abd751923ee9c08 Mon Sep 17 00:00:00 2001 From: Christopher Chavez Date: Tue, 4 Jun 2024 11:47:15 -0500 Subject: [PATCH 085/373] gh-112672: Fix builtin Tkinter with Tcl 9.0 (GH-112681) * Add declaration of Tcl_AppInit(), missing in Tcl 9.0. * Use Tcl_Size instead of int where needed. Co-authored-by: Serhiy Storchaka --- ...-06-04-19-03-25.gh-issue-112672.K2XfZH.rst | 1 + Modules/_tkinter.c | 38 ++++++++++++------- 2 files changed, 25 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-04-19-03-25.gh-issue-112672.K2XfZH.rst diff --git a/Misc/NEWS.d/next/Library/2024-06-04-19-03-25.gh-issue-112672.K2XfZH.rst b/Misc/NEWS.d/next/Library/2024-06-04-19-03-25.gh-issue-112672.K2XfZH.rst new file mode 100644 index 00000000000000..46345bff117b19 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-04-19-03-25.gh-issue-112672.K2XfZH.rst @@ -0,0 +1 @@ +Support building :mod:`tkinter` with Tcl 9.0. diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 0cff36dd307c39..24f87c8d34c6b2 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -69,6 +69,12 @@ Copyright (C) 1994 Steen Lumholt. #define USE_DEPRECATED_TOMMATH_API 1 #endif +// As suggested by https://core.tcl-lang.org/tcl/wiki?name=Migrating+C+extensions+to+Tcl+9 +#ifndef TCL_SIZE_MAX +typedef int Tcl_Size; +#define TCL_SIZE_MAX INT_MAX +#endif + #if !(defined(MS_WINDOWS) || defined(__CYGWIN__)) #define HAVE_CREATEFILEHANDLER #endif @@ -489,7 +495,7 @@ unicodeFromTclString(const char *s) static PyObject * unicodeFromTclObj(Tcl_Obj *value) { - int len; + Tcl_Size len; #if USE_TCL_UNICODE int byteorder = NATIVE_BYTEORDER; const Tcl_UniChar *u = Tcl_GetUnicodeFromObj(value, &len); @@ -517,6 +523,10 @@ class _tkinter.tktimertoken "TkttObject *" "&Tktt_Type_spec" /**** Tkapp Object ****/ +#if TK_MAJOR_VERSION >= 9 +int Tcl_AppInit(Tcl_Interp *); +#endif + #ifndef WITH_APPINIT int Tcl_AppInit(Tcl_Interp *interp) @@ -1142,7 +1152,7 @@ FromObj(TkappObject *tkapp, Tcl_Obj *value) } if (value->typePtr == tkapp->ByteArrayType) { - int size; + Tcl_Size size; char *data = (char*)Tcl_GetByteArrayFromObj(value, &size); return PyBytes_FromStringAndSize(data, size); } @@ -1168,8 +1178,8 @@ FromObj(TkappObject *tkapp, Tcl_Obj *value) } if (value->typePtr == tkapp->ListType) { - int size; - int i, status; + Tcl_Size i, size; + int status; PyObject *elem; Tcl_Obj *tcl_elem; @@ -1225,9 +1235,9 @@ typedef struct Tkapp_CallEvent { } Tkapp_CallEvent; static void -Tkapp_CallDeallocArgs(Tcl_Obj** objv, Tcl_Obj** objStore, int objc) +Tkapp_CallDeallocArgs(Tcl_Obj** objv, Tcl_Obj** objStore, Tcl_Size objc) { - int i; + Tcl_Size i; for (i = 0; i < objc; i++) Tcl_DecrRefCount(objv[i]); if (objv != objStore) @@ -1238,7 +1248,7 @@ Tkapp_CallDeallocArgs(Tcl_Obj** objv, Tcl_Obj** objStore, int objc) interpreter thread, which may or may not be the calling thread. */ static Tcl_Obj** -Tkapp_CallArgs(PyObject *args, Tcl_Obj** objStore, int *pobjc) +Tkapp_CallArgs(PyObject *args, Tcl_Obj** objStore, Tcl_Size *pobjc) { Tcl_Obj **objv = objStore; Py_ssize_t objc = 0, i; @@ -1286,10 +1296,10 @@ Tkapp_CallArgs(PyObject *args, Tcl_Obj** objStore, int *pobjc) Tcl_IncrRefCount(objv[i]); } } - *pobjc = (int)objc; + *pobjc = (Tcl_Size)objc; return objv; finally: - Tkapp_CallDeallocArgs(objv, objStore, (int)objc); + Tkapp_CallDeallocArgs(objv, objStore, (Tcl_Size)objc); return NULL; } @@ -1356,7 +1366,7 @@ Tkapp_CallProc(Tcl_Event *evPtr, int flags) Tkapp_CallEvent *e = (Tkapp_CallEvent *)evPtr; Tcl_Obj *objStore[ARGSZ]; Tcl_Obj **objv; - int objc; + Tcl_Size objc; int i; ENTER_PYTHON if (e->self->trace && !Tkapp_Trace(e->self, PyTuple_Pack(1, e->args))) { @@ -1412,7 +1422,7 @@ Tkapp_Call(PyObject *selfptr, PyObject *args) { Tcl_Obj *objStore[ARGSZ]; Tcl_Obj **objv = NULL; - int objc, i; + Tcl_Size objc; PyObject *res = NULL; TkappObject *self = (TkappObject*)selfptr; int flags = TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL; @@ -1459,6 +1469,7 @@ Tkapp_Call(PyObject *selfptr, PyObject *args) { TRACE(self, ("(O)", args)); + int i; objv = Tkapp_CallArgs(args, objStore, &objc); if (!objv) return NULL; @@ -2193,13 +2204,12 @@ _tkinter_tkapp_splitlist(TkappObject *self, PyObject *arg) /*[clinic end generated code: output=13b51d34386d36fb input=2b2e13351e3c0b53]*/ { char *list; - int argc; + Tcl_Size argc, i; const char **argv; PyObject *v; - int i; if (PyTclObject_Check(arg)) { - int objc; + Tcl_Size objc; Tcl_Obj **objv; if (Tcl_ListObjGetElements(Tkapp_Interp(self), ((PyTclObject*)arg)->value, From bf8e5e53d0c359a1f9c285d855e7a5e9b6d91375 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 4 Jun 2024 19:26:44 +0200 Subject: [PATCH 086/373] gh-120041: Refactor check for visible completion menu in completing_reader (#120055) --- Lib/_pyrepl/commands.py | 7 +------ Lib/_pyrepl/completing_reader.py | 3 +++ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index b967f5206614f8..2ef5dada9d9e58 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -365,12 +365,7 @@ def do(self) -> None: r = self.reader text = self.event * r.get_arg() r.insert(text) - if ( - len(text) == 1 and - r.pos == len(r.buffer) and - not r.cmpltn_menu_visible and # type: ignore[attr-defined] - not r.cmpltn_message_visible # type: ignore[attr-defined] - ): + if len(text) == 1 and r.pos == len(r.buffer): r.calc_screen = r.append_to_screen diff --git a/Lib/_pyrepl/completing_reader.py b/Lib/_pyrepl/completing_reader.py index 215ad8753c9f8b..8df35ccb9117b1 100644 --- a/Lib/_pyrepl/completing_reader.py +++ b/Lib/_pyrepl/completing_reader.py @@ -210,6 +210,9 @@ def do(self) -> None: commands.self_insert.do(self) + if r.cmpltn_menu_visible or r.cmpltn_message_visible: + r.calc_screen = r.calc_complete_screen + if r.cmpltn_menu_visible: stem = r.get_stem() if len(stem) < 1: From d419d468ff4aaf6bc673354d0ee41b273d09dd3f Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 4 Jun 2024 13:38:29 -0400 Subject: [PATCH 087/373] gh-120039: Reduce expected timeout in test_siginterrupt_off (#120047) The process is expected to time out. In the refleak builds, `support.SHORT_TIMEOUT` is often five minutes and we run the tests six times, so test_signal was taking >30 minutes. --- Lib/test/test_signal.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_signal.py b/Lib/test/test_signal.py index 61fb047caf6dab..591cd4177d9f41 100644 --- a/Lib/test/test_signal.py +++ b/Lib/test/test_signal.py @@ -698,7 +698,7 @@ def handler(signum, frame): @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") class SiginterruptTest(unittest.TestCase): - def readpipe_interrupted(self, interrupt): + def readpipe_interrupted(self, interrupt, timeout=support.SHORT_TIMEOUT): """Perform a read during which a signal will arrive. Return True if the read is interrupted by the signal and raises an exception. Return False if it returns normally. @@ -746,7 +746,7 @@ def handler(signum, frame): # wait until the child process is loaded and has started first_line = process.stdout.readline() - stdout, stderr = process.communicate(timeout=support.SHORT_TIMEOUT) + stdout, stderr = process.communicate(timeout=timeout) except subprocess.TimeoutExpired: process.kill() return False @@ -777,7 +777,7 @@ def test_siginterrupt_off(self): # If a signal handler is installed and siginterrupt is called with # a false value for the second argument, when that signal arrives, it # does not interrupt a syscall that's in progress. - interrupted = self.readpipe_interrupted(False) + interrupted = self.readpipe_interrupted(False, timeout=2) self.assertFalse(interrupted) From 010ea93b2b888149561becefeee90826bf8a2934 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 4 Jun 2024 19:46:33 +0200 Subject: [PATCH 088/373] gh-119553: Clear reader on Ctrl-C command (GH-119801) --- Lib/_pyrepl/commands.py | 1 + Lib/test/test_pyrepl/support.py | 2 ++ Lib/test/test_pyrepl/test_reader.py | 16 ++++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 2ef5dada9d9e58..e94e8c25d379c1 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -221,6 +221,7 @@ def do(self) -> None: class ctrl_c(Command): def do(self) -> None: + self.reader.finish() raise KeyboardInterrupt diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py index e807b5f3404550..70e12286f7d781 100644 --- a/Lib/test/test_pyrepl/support.py +++ b/Lib/test/test_pyrepl/support.py @@ -75,6 +75,8 @@ def handle_all_events( reader.handle1() except StopIteration: pass + except KeyboardInterrupt: + pass return reader, console diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index d02815bfa11d74..079c963d19aad5 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -179,6 +179,22 @@ def test_newline_within_block_trailing_whitespace(self): self.assert_screen_equals(reader, expected) self.assertTrue(reader.finished) + def test_keyboard_interrupt_clears_screen(self): + namespace = {"itertools": itertools} + code = "import itertools\nitertools." + events = itertools.chain(code_to_events(code), [ + Event(evt='key', data='\t', raw=bytearray(b'\t')), # Two tabs for completion + Event(evt='key', data='\t', raw=bytearray(b'\t')), + Event(evt='key', data='\x03', raw=bytearray(b'\x03')), # Ctrl-C + ]) + + completing_reader = functools.partial( + prepare_reader, + readline_completer=rlcompleter.Completer(namespace).complete + ) + reader, _ = handle_all_events(events, prepare_reader=completing_reader) + self.assertEqual(reader.calc_screen(), code.split("\n")) + def test_prompt_length(self): # Handles simple ASCII prompt ps1 = ">>> " From bf5e1065f4ec2077c6ca352fc1ad940a76d1f6c9 Mon Sep 17 00:00:00 2001 From: Paulo Freitas Date: Tue, 4 Jun 2024 14:55:11 -0300 Subject: [PATCH 089/373] doc: Mention the missing reflected special methods for all binary operations (GH-119931) --- Doc/reference/expressions.rst | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 00b57effd3e1c0..872773f4d28235 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1211,7 +1211,8 @@ Raising ``0.0`` to a negative power results in a :exc:`ZeroDivisionError`. Raising a negative number to a fractional power results in a :class:`complex` number. (In earlier versions it raised a :exc:`ValueError`.) -This operation can be customized using the special :meth:`~object.__pow__` method. +This operation can be customized using the special :meth:`~object.__pow__` and +:meth:`~object.__rpow__` methods. .. _unary: @@ -1299,6 +1300,9 @@ This operation can be customized using the special :meth:`~object.__mul__` and The ``@`` (at) operator is intended to be used for matrix multiplication. No builtin Python types implement this operator. +This operation can be customized using the special :meth:`~object.__matmul__` and +:meth:`~object.__rmatmul__` methods. + .. versionadded:: 3.5 .. index:: @@ -1314,8 +1318,10 @@ integer; the result is that of mathematical division with the 'floor' function applied to the result. Division by zero raises the :exc:`ZeroDivisionError` exception. -This operation can be customized using the special :meth:`~object.__truediv__` and -:meth:`~object.__floordiv__` methods. +The division operation can be customized using the special :meth:`~object.__truediv__` +and :meth:`~object.__rtruediv__` methods. +The floor division operation can be customized using the special +:meth:`~object.__floordiv__` and :meth:`~object.__rfloordiv__` methods. .. index:: single: modulo @@ -1340,7 +1346,8 @@ also overloaded by string objects to perform old-style string formatting (also known as interpolation). The syntax for string formatting is described in the Python Library Reference, section :ref:`old-string-formatting`. -The *modulo* operation can be customized using the special :meth:`~object.__mod__` method. +The *modulo* operation can be customized using the special :meth:`~object.__mod__` +and :meth:`~object.__rmod__` methods. The floor division operator, the modulo operator, and the :func:`divmod` function are not defined for complex numbers. Instead, convert to a floating @@ -1367,7 +1374,8 @@ This operation can be customized using the special :meth:`~object.__add__` and The ``-`` (subtraction) operator yields the difference of its arguments. The numeric arguments are first converted to a common type. -This operation can be customized using the special :meth:`~object.__sub__` method. +This operation can be customized using the special :meth:`~object.__sub__` and +:meth:`~object.__rsub__` methods. .. _shifting: @@ -1388,8 +1396,10 @@ The shifting operations have lower priority than the arithmetic operations: These operators accept integers as arguments. They shift the first argument to the left or right by the number of bits given by the second argument. -This operation can be customized using the special :meth:`~object.__lshift__` and -:meth:`~object.__rshift__` methods. +The left shift operation can be customized using the special :meth:`~object.__lshift__` +and :meth:`~object.__rlshift__` methods. +The right shift operation can be customized using the special :meth:`~object.__rshift__` +and :meth:`~object.__rrshift__` methods. .. index:: pair: exception; ValueError From d9095194dde27eaabfc0b86a11989cdb9a2acfe1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 4 Jun 2024 19:32:43 +0100 Subject: [PATCH 090/373] gh-119842: Honor PyOS_InputHook in the new REPL (GH-119843) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pablo Galindo Co-authored-by: Łukasz Langa Co-authored-by: Michael Droettboom --- Lib/_pyrepl/console.py | 12 +++++- Lib/_pyrepl/reader.py | 10 ++++- Lib/_pyrepl/unix_console.py | 22 ++++++++--- Lib/_pyrepl/windows_console.py | 22 ++++++++++- Lib/test/test_pyrepl/test_reader.py | 17 +++++++++ ...-05-31-12-06-11.gh-issue-119842.tCGVsv.rst | 1 + Modules/clinic/posixmodule.c.h | 38 ++++++++++++++++++- Modules/posixmodule.c | 33 ++++++++++++++++ 8 files changed, 144 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-31-12-06-11.gh-issue-119842.tCGVsv.rst diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index aa0bde865825c9..a8d3f520340dcf 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -33,6 +33,7 @@ if TYPE_CHECKING: from typing import IO + from typing import Callable @dataclass @@ -134,8 +135,15 @@ def getpending(self) -> Event: ... @abstractmethod - def wait(self) -> None: - """Wait for an event.""" + def wait(self, timeout: float | None) -> bool: + """Wait for an event. The return value is True if an event is + available, False if the timeout has been reached. If timeout is + None, wait forever. The timeout is in milliseconds.""" + ... + + @property + def input_hook(self) -> Callable[[], int] | None: + """Returns the current input hook.""" ... @abstractmethod diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index f2e68ef6f3ee66..beee7764e0eb84 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -650,7 +650,15 @@ def handle1(self, block: bool = True) -> bool: self.dirty = True while True: - event = self.console.get_event(block) + input_hook = self.console.input_hook + if input_hook: + input_hook() + # We use the same timeout as in readline.c: 100ms + while not self.console.wait(100): + input_hook() + event = self.console.get_event(block=False) + else: + event = self.console.get_event(block) if not event: # can only happen if we're not blocking return False diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py index 4bdb02261982c3..2f73a59dd1fced 100644 --- a/Lib/_pyrepl/unix_console.py +++ b/Lib/_pyrepl/unix_console.py @@ -118,9 +118,12 @@ def __init__(self): def register(self, fd, flag): self.fd = fd - - def poll(self): # note: a 'timeout' argument would be *milliseconds* - r, w, e = select.select([self.fd], [], []) + # note: The 'timeout' argument is received as *milliseconds* + def poll(self, timeout: float | None = None) -> list[int]: + if timeout is None: + r, w, e = select.select([self.fd], [], []) + else: + r, w, e = select.select([self.fd], [], [], timeout/1000) return r poll = MinimalPoll # type: ignore[assignment] @@ -385,11 +388,11 @@ def get_event(self, block: bool = True) -> Event | None: break return self.event_queue.get() - def wait(self): + def wait(self, timeout: float | None = None) -> bool: """ Wait for events on the console. """ - self.pollob.poll() + return bool(self.pollob.poll(timeout)) def set_cursor_vis(self, visible): """ @@ -527,6 +530,15 @@ def clear(self): self.__posxy = 0, 0 self.screen = [] + @property + def input_hook(self): + try: + import posix + except ImportError: + return None + if posix._is_inputhook_installed(): + return posix._inputhook + def __enable_bracketed_paste(self) -> None: os.write(self.output_fd, b"\x1b[?2004h") diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 2277865e3262fc..f691ca3fbb07b8 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -23,6 +23,8 @@ from multiprocessing import Value import os import sys +import time +import msvcrt from abc import ABC, abstractmethod from collections import deque @@ -202,6 +204,15 @@ def refresh(self, screen: list[str], c_xy: tuple[int, int]) -> None: self.screen = screen self.move_cursor(cx, cy) + @property + def input_hook(self): + try: + import nt + except ImportError: + return None + if nt._is_inputhook_installed(): + return nt._inputhook + def __write_changed_line( self, y: int, oldline: str, newline: str, px_coord: int ) -> None: @@ -460,9 +471,16 @@ def getpending(self) -> Event: processed.""" return Event("key", "", b"") - def wait(self) -> None: + def wait(self, timeout: float | None) -> bool: """Wait for an event.""" - raise NotImplementedError("No wait support") + # Poor man's Windows select loop + start_time = time.time() + while True: + if msvcrt.kbhit(): # type: ignore[attr-defined] + return True + if timeout and time.time() - start_time > timeout: + return False + time.sleep(0.01) def repaint(self) -> None: raise NotImplementedError("No repaint support") diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 079c963d19aad5..78b11323d60a85 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -2,8 +2,10 @@ import functools import rlcompleter from unittest import TestCase +from unittest.mock import MagicMock, patch from .support import handle_all_events, handle_events_narrow_console, code_to_events, prepare_reader +from test.support import import_helper from _pyrepl.console import Event from _pyrepl.reader import Reader @@ -179,6 +181,21 @@ def test_newline_within_block_trailing_whitespace(self): self.assert_screen_equals(reader, expected) self.assertTrue(reader.finished) + def test_input_hook_is_called_if_set(self): + input_hook = MagicMock() + def _prepare_console(events): + console = MagicMock() + console.get_event.side_effect = events + console.height = 100 + console.width = 80 + console.input_hook = input_hook + return console + + events = code_to_events("a") + reader, _ = handle_all_events(events, prepare_console=_prepare_console) + + self.assertEqual(len(input_hook.mock_calls), 4) + def test_keyboard_interrupt_clears_screen(self): namespace = {"itertools": itertools} code = "import itertools\nitertools." diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-31-12-06-11.gh-issue-119842.tCGVsv.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-12-06-11.gh-issue-119842.tCGVsv.rst new file mode 100644 index 00000000000000..2fcb170f6226e5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-12-06-11.gh-issue-119842.tCGVsv.rst @@ -0,0 +1 @@ +Honor :c:func:`PyOS_InputHook` in the new REPL. Patch by Pablo Galindo diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 83dcc7a60c2110..69fc178331c09c 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -12116,6 +12116,42 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #endif /* defined(MS_WINDOWS) */ +PyDoc_STRVAR(os__inputhook__doc__, +"_inputhook($module, /)\n" +"--\n" +"\n" +"Calls PyOS_CallInputHook droppong the GIL first"); + +#define OS__INPUTHOOK_METHODDEF \ + {"_inputhook", (PyCFunction)os__inputhook, METH_NOARGS, os__inputhook__doc__}, + +static PyObject * +os__inputhook_impl(PyObject *module); + +static PyObject * +os__inputhook(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return os__inputhook_impl(module); +} + +PyDoc_STRVAR(os__is_inputhook_installed__doc__, +"_is_inputhook_installed($module, /)\n" +"--\n" +"\n" +"Checks if PyOS_CallInputHook is set"); + +#define OS__IS_INPUTHOOK_INSTALLED_METHODDEF \ + {"_is_inputhook_installed", (PyCFunction)os__is_inputhook_installed, METH_NOARGS, os__is_inputhook_installed__doc__}, + +static PyObject * +os__is_inputhook_installed_impl(PyObject *module); + +static PyObject * +os__is_inputhook_installed(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return os__is_inputhook_installed_impl(module); +} + #ifndef OS_TTYNAME_METHODDEF #define OS_TTYNAME_METHODDEF #endif /* !defined(OS_TTYNAME_METHODDEF) */ @@ -12783,4 +12819,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=49c2d7a65f7a9f3b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=faaa5e5ffb7b165d input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 1251ea63348946..386e942d53f539 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -16784,6 +16784,37 @@ os__supports_virtual_terminal_impl(PyObject *module) } #endif +/*[clinic input] +os._inputhook + +Calls PyOS_CallInputHook droppong the GIL first +[clinic start generated code]*/ + +static PyObject * +os__inputhook_impl(PyObject *module) +/*[clinic end generated code: output=525aca4ef3c6149f input=fc531701930d064f]*/ +{ + int result = 0; + if (PyOS_InputHook) { + Py_BEGIN_ALLOW_THREADS; + result = PyOS_InputHook(); + Py_END_ALLOW_THREADS; + } + return PyLong_FromLong(result); +} + +/*[clinic input] +os._is_inputhook_installed + +Checks if PyOS_CallInputHook is set +[clinic start generated code]*/ + +static PyObject * +os__is_inputhook_installed_impl(PyObject *module) +/*[clinic end generated code: output=3b3eab4f672c689a input=ff177c9938dd76d8]*/ +{ + return PyBool_FromLong(PyOS_InputHook != NULL); +} static PyMethodDef posix_methods[] = { @@ -16997,6 +17028,8 @@ static PyMethodDef posix_methods[] = { OS__PATH_LEXISTS_METHODDEF OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF + OS__INPUTHOOK_METHODDEF + OS__IS_INPUTHOOK_INSTALLED_METHODDEF {NULL, NULL} /* Sentinel */ }; From 710cbea6604d27c7d59ae4953bf522b997a82cc7 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 4 Jun 2024 14:59:23 -0400 Subject: [PATCH 091/373] gh-120048: Make `test_imaplib` faster (#120050) The `test_imaplib` was taking 40+ minutes in the refleak build bots because the tests waiting on a client `self._setup()` was creating a client that prevented progress until its connection timed out, which scaled with the global timeout. We should set `connect=False` for the tests that don't want `_setup()` to create a client. Co-authored-by: Serhiy Storchaka --- Lib/test/test_imaplib.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_imaplib.py b/Lib/test/test_imaplib.py index 79bf7dbdbb81a0..b5384b59463742 100644 --- a/Lib/test/test_imaplib.py +++ b/Lib/test/test_imaplib.py @@ -458,18 +458,14 @@ def test_simple_with_statement(self): with self.imap_class(*server.server_address): pass - @requires_resource('walltime') def test_imaplib_timeout_test(self): - _, server = self._setup(SimpleIMAPHandler) - addr = server.server_address[1] - client = self.imap_class("localhost", addr, timeout=None) - self.assertEqual(client.sock.timeout, None) - client.shutdown() - client = self.imap_class("localhost", addr, timeout=support.LOOPBACK_TIMEOUT) - self.assertEqual(client.sock.timeout, support.LOOPBACK_TIMEOUT) - client.shutdown() + _, server = self._setup(SimpleIMAPHandler, connect=False) + with self.imap_class(*server.server_address, timeout=None) as client: + self.assertEqual(client.sock.timeout, None) + with self.imap_class(*server.server_address, timeout=support.LOOPBACK_TIMEOUT) as client: + self.assertEqual(client.sock.timeout, support.LOOPBACK_TIMEOUT) with self.assertRaises(ValueError): - client = self.imap_class("localhost", addr, timeout=0) + self.imap_class(*server.server_address, timeout=0) def test_imaplib_timeout_functionality_test(self): class TimeoutHandler(SimpleIMAPHandler): @@ -552,7 +548,6 @@ class NewIMAPSSLTests(NewIMAPTestsMixin, unittest.TestCase): imap_class = IMAP4_SSL server_class = SecureTCPServer - @requires_resource('walltime') def test_ssl_raises(self): ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) self.assertEqual(ssl_context.verify_mode, ssl.CERT_REQUIRED) @@ -566,17 +561,16 @@ def test_ssl_raises(self): CERTIFICATE_VERIFY_FAILED # AWS-LC )""", re.X) with self.assertRaisesRegex(ssl.CertificateError, regex): - _, server = self._setup(SimpleIMAPHandler) + _, server = self._setup(SimpleIMAPHandler, connect=False) client = self.imap_class(*server.server_address, ssl_context=ssl_context) client.shutdown() - @requires_resource('walltime') def test_ssl_verified(self): ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ssl_context.load_verify_locations(CAFILE) - _, server = self._setup(SimpleIMAPHandler) + _, server = self._setup(SimpleIMAPHandler, connect=False) client = self.imap_class("localhost", server.server_address[1], ssl_context=ssl_context) client.shutdown() From 109e1082ea92f89d42cd70f2cc7ca6fba6be9bab Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Tue, 4 Jun 2024 20:16:43 +0100 Subject: [PATCH 092/373] gh-119819: Update test to skip if _multiprocessing is unavailable. (GH-120067) --- Lib/test/test_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index d3e5ac2be2e21e..0c9a24e58dfd8c 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -3926,9 +3926,9 @@ def test_config_queue_handler(self): msg = str(ctx.exception) self.assertEqual(msg, "Unable to configure handler 'ah'") - @unittest.skipIf(support.is_wasi, "WASI does not have multiprocessing.") def test_multiprocessing_queues(self): # See gh-119819 + import_helper.import_module('_multiprocessing') # will skip test if it's not available cd = copy.deepcopy(self.config_queue_handler) from multiprocessing import Queue as MQ, Manager as MM q1 = MQ() # this can't be pickled From 4055577221f5f52af329e87f31d81bb8fb02c504 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 4 Jun 2024 15:26:26 -0400 Subject: [PATCH 093/373] gh-119999: Fix potential race condition in `_Py_ExplicitMergeRefcount` (#120000) We need to write to `ob_ref_local` and `ob_tid` before `ob_ref_shared`. Once we mark `ob_ref_shared` as merged, some other thread may free the object because the caller also passes in `-1` as `extra` to give up its only reference. --- Objects/object.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Objects/object.c b/Objects/object.c index 2e9962f4651e1c..b7730475ac3768 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -401,24 +401,27 @@ Py_ssize_t _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) { assert(!_Py_IsImmortal(op)); + +#ifdef Py_REF_DEBUG + _Py_AddRefTotal(_PyThreadState_GET(), extra); +#endif + + // gh-119999: Write to ob_ref_local and ob_tid before merging the refcount. + Py_ssize_t local = (Py_ssize_t)op->ob_ref_local; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0); + Py_ssize_t refcnt; Py_ssize_t new_shared; Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); do { refcnt = Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT); - refcnt += (Py_ssize_t)op->ob_ref_local; + refcnt += local; refcnt += extra; new_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED); } while (!_Py_atomic_compare_exchange_ssize(&op->ob_ref_shared, &shared, new_shared)); - -#ifdef Py_REF_DEBUG - _Py_AddRefTotal(_PyThreadState_GET(), extra); -#endif - - _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); - _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0); return refcnt; } #endif /* Py_GIL_DISABLED */ From 69b3e8ea569faabccd74036e3d0e5ec7c0c62a20 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 4 Jun 2024 23:22:28 +0200 Subject: [PATCH 094/373] gh-119553: Fix console when pressing Ctrl-C within a multiline block (#120075) --- Lib/_pyrepl/commands.py | 2 ++ Lib/_pyrepl/simple_interact.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index e94e8c25d379c1..6bffed1bfe9327 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -216,11 +216,13 @@ def do(self) -> None: import signal self.reader.console.finish() + self.reader.finish() os.kill(os.getpid(), signal.SIGINT) class ctrl_c(Command): def do(self) -> None: + self.reader.console.finish() self.reader.finish() raise KeyboardInterrupt diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index 256bbc7c6d7626..2e5698eb131684 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -149,7 +149,7 @@ def more_lines(unicodetext: str) -> bool: assert not more input_n += 1 except KeyboardInterrupt: - console.write("\nKeyboardInterrupt\n") + console.write("KeyboardInterrupt\n") console.resetbuffer() except MemoryError: console.write("\nMemoryError\n") From 770f3c1eadd3392c72fd55be47770234dd143a14 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 5 Jun 2024 00:00:02 +0100 Subject: [PATCH 095/373] gh-114616: Improve docs regarding changes to caches representation in dis (#120033) --- Doc/library/dis.rst | 7 ++++--- Doc/whatsnew/3.13.rst | 7 +++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index fda46d260bcb46..87d1bcdfaf3f1d 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -336,9 +336,10 @@ operation is being performed, so the intermediate analysis object isn't useful: Added the *show_caches* and *adaptive* parameters. .. versionchanged:: 3.13 - The *show_caches* parameter is deprecated and has no effect. The *cache_info* - field of each instruction is populated regardless of its value. - + The *show_caches* parameter is deprecated and has no effect. The iterator + generates the :class:`Instruction` instances with the *cache_info* + field populated (regardless of the value of *show_caches*) and it no longer + generates separate items for the cache entries. .. function:: findlinestarts(code) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index dfbeadce0eea27..a1d2a0d84e7581 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -706,6 +706,13 @@ dis the ``show_offsets`` parameter. (Contributed by Irit Katriel in :gh:`112137`.) +* :meth:`~dis.get_instructions` no longer represents cache entries as + separate instructions. Instead, it returns them as part of the + :class:`~dis.Instruction`, in the new *cache_info* field. The + *show_caches* argument to :meth:`~dis.get_instructions` is + deprecated and no longer has any effect. + (Contributed by Irit Katriel in :gh:`112962`.) + .. _whatsnew313-doctest: doctest From b6b0dcbfc054f581b6f78602e4c2e9474e3efe21 Mon Sep 17 00:00:00 2001 From: shurj0 <60540027+shurj0@users.noreply.github.com> Date: Wed, 5 Jun 2024 06:23:12 +0600 Subject: [PATCH 096/373] gh-120078: Fix struct_time attr typo tm_day -> tm_mday in Doc/library/time.rst (GH-120081) --- Doc/library/time.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/time.rst b/Doc/library/time.rst index ef033d59d56185..4d7661715aa0af 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -617,7 +617,7 @@ Functions - range [1, 12] * - 2 - - .. attribute:: tm_day + - .. attribute:: tm_mday - range [1, 31] * - 3 From 983efcf15b2503fe0c05d5e03762385967962b33 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Wed, 5 Jun 2024 07:25:47 +0100 Subject: [PATCH 097/373] =?UTF-8?q?gh-119819:=20Update=20logging=20configu?= =?UTF-8?q?ration=20to=20support=20joinable=20multiproc=E2=80=A6=20(GH-120?= =?UTF-8?q?090)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gh-119819: Update logging configuration to support joinable multiprocessing manager queues. --- Lib/logging/config.py | 4 +++- Lib/test/test_logging.py | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Lib/logging/config.py b/Lib/logging/config.py index 0b10bf82b60a36..9de84e527b18ac 100644 --- a/Lib/logging/config.py +++ b/Lib/logging/config.py @@ -783,8 +783,10 @@ def configure_handler(self, config): from multiprocessing.queues import Queue as MPQueue from multiprocessing import Manager as MM proxy_queue = MM().Queue() + proxy_joinable_queue = MM().JoinableQueue() qspec = config['queue'] - if not isinstance(qspec, (queue.Queue, MPQueue, type(proxy_queue))): + if not isinstance(qspec, (queue.Queue, MPQueue, + type(proxy_queue), type(proxy_joinable_queue))): if isinstance(qspec, str): q = self.resolve(qspec) if not callable(q): diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 0c9a24e58dfd8c..ef2d4a621be962 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -3928,12 +3928,16 @@ def test_config_queue_handler(self): def test_multiprocessing_queues(self): # See gh-119819 - import_helper.import_module('_multiprocessing') # will skip test if it's not available + + # will skip test if it's not available + import_helper.import_module('_multiprocessing') + cd = copy.deepcopy(self.config_queue_handler) from multiprocessing import Queue as MQ, Manager as MM q1 = MQ() # this can't be pickled q2 = MM().Queue() # a proxy queue for use when pickling is needed - for qspec in (q1, q2): + q3 = MM().JoinableQueue() # a joinable proxy queue + for qspec in (q1, q2, q3): fn = make_temp_file('.log', 'test_logging-cmpqh-') cd['handlers']['h1']['filename'] = fn cd['handlers']['ah']['queue'] = qspec From 5c02ea8bae2287a828840f5734966da23dc573dc Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 5 Jun 2024 12:56:01 +0100 Subject: [PATCH 098/373] gh-119287: clarify doc on BaseExceptionGroup.derive and link to it from contextlib.suppress (#119657) --- Doc/library/contextlib.rst | 4 +++- Doc/library/exceptions.rst | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Doc/library/contextlib.rst b/Doc/library/contextlib.rst index bad9da52d6a6ca..27cf99446e5980 100644 --- a/Doc/library/contextlib.rst +++ b/Doc/library/contextlib.rst @@ -314,7 +314,9 @@ Functions and classes provided: If the code within the :keyword:`!with` block raises a :exc:`BaseExceptionGroup`, suppressed exceptions are removed from the - group. If any exceptions in the group are not suppressed, a group containing them is re-raised. + group. Any exceptions of the group which are not suppressed are re-raised in + a new group which is created using the original group's :meth:`~BaseExceptionGroup.derive` + method. .. versionadded:: 3.4 diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index 7879fb015bddfa..7910b306f143d7 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -989,7 +989,8 @@ their subgroups based on the types of the contained exceptions. Returns an exception group with the same :attr:`message`, but which wraps the exceptions in ``excs``. - This method is used by :meth:`subgroup` and :meth:`split`. A + This method is used by :meth:`subgroup` and :meth:`split`, which + are used in various contexts to break up an exception group. A subclass needs to override it in order to make :meth:`subgroup` and :meth:`split` return instances of the subclass rather than :exc:`ExceptionGroup`. From 4bba1c9e6cfeaf69302b501a4306668613db4b28 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 5 Jun 2024 09:23:29 -0400 Subject: [PATCH 099/373] gh-120065: Increase `collect_in_thread` period to 5 ms. (#120068) This matches the default GIL switch interval. It greatly speeds up the free-threaded build: previously, it spent nearly all its time in `gc.collect()`. --- Lib/test/test_weakref.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 16da24d7805b56..ef2fe92cc219b6 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -82,7 +82,7 @@ def callback(self, ref): @contextlib.contextmanager -def collect_in_thread(period=0.0001): +def collect_in_thread(period=0.005): """ Ensure GC collections happen in a different thread, at a high frequency. """ From 10eac0269bce4e2ba575e5b549d3dd9a6da9349a Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 5 Jun 2024 16:28:47 +0100 Subject: [PATCH 100/373] gh-119786: add links to code in exception handling doc (#120077) --- InternalDocs/exception_handling.md | 49 ++++++++++-------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md index 22d9c3bf7933f1..ec09e0769929fa 100644 --- a/InternalDocs/exception_handling.md +++ b/InternalDocs/exception_handling.md @@ -67,14 +67,18 @@ handler located at label `L1`. Handling Exceptions ------------------- -At runtime, when an exception occurs, the interpreter looks up -the offset of the current instruction in the exception table. If -it finds a handler, control flow transfers to it. Otherwise, the +At runtime, when an exception occurs, the interpreter calls +``get_exception_handler()`` in +[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c) +to look up the offset of the current instruction in the exception +table. If it finds a handler, control flow transfers to it. Otherwise, the exception bubbles up to the caller, and the caller's frame is checked for a handler covering the `CALL` instruction. This repeats until a handler is found or the topmost frame is reached. If no handler is found, the program terminates. During unwinding, -the traceback is constructed as each frame is added to it. +the traceback is constructed as each frame is added to it by +``PyTraceBack_Here()``, which is in +[Python/traceback.c](https://github.com/python/cpython/blob/main/Python/traceback.c). Along with the location of an exception handler, each entry of the exception table also contains the stack depth of the `try` instruction @@ -169,33 +173,12 @@ which is then encoded as: for a total of five bytes. +The code to construct the exception table is in ``assemble_exception_table()`` +in [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). -Script to parse the exception table ------------------------------------ - -``` -def parse_varint(iterator): - b = next(iterator) - val = b & 63 - while b&64: - val <<= 6 - b = next(iterator) - val |= b&63 - return val -``` -``` -def parse_exception_table(code): - iterator = iter(code.co_exceptiontable) - try: - while True: - start = parse_varint(iterator)*2 - length = parse_varint(iterator)*2 - end = start + length - 2 # Present as inclusive, not exclusive - target = parse_varint(iterator)*2 - dl = parse_varint(iterator) - depth = dl >> 1 - lasti = bool(dl&1) - yield start, end, target, depth, lasti - except StopIteration: - return -``` +The interpreter's function to lookup the table by instruction offset is +``get_exception_handler()`` in +[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c). +The Python function ``_parse_exception_table()`` in +[Lib/dis.py](https://github.com/python/cpython/blob/main/Lib/dis.py) +returns the exception table content as a list of namedtuple instances. From 14e3c7071bd1add30d4b69b62e011c7d38aebd9b Mon Sep 17 00:00:00 2001 From: benchatt Date: Wed, 5 Jun 2024 10:35:40 -0700 Subject: [PATCH 101/373] gh-115225: Raise error on unsupported ISO 8601 time strings (#119339) Some time strings that contain fractional hours or minutes are permitted by ISO 8601, but such strings are very unlikely to be intentional. The current parser does not parse such strings correctly or raise an error. This change raises a ValueError when hours or minutes contain a decimal mark. Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Lib/test/datetimetester.py | 2 ++ Misc/ACKS | 1 + .../Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst | 1 + Modules/_datetimemodule.c | 3 +++ 4 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 535b17d0727611..3759504b02e550 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -4412,6 +4412,8 @@ def test_fromisoformat_fails(self): '12:30:45.123456-', # Extra at end of microsecond time '12:30:45.123456+', # Extra at end of microsecond time '12:30:45.123456+12:00:30a', # Extra at end of full time + '12.5', # Decimal mark at end of hour + '12:30,5', # Decimal mark at end of minute ] for bad_str in bad_strs: diff --git a/Misc/ACKS b/Misc/ACKS index 2e7e12481bacd7..af92d81ff3141a 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -315,6 +315,7 @@ Greg Chapman Mitch Chapman Matt Chaput William Chargin +Ben Chatterton Yogesh Chaudhari Gautam Chaudhuri David Chaum diff --git a/Misc/NEWS.d/next/Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst b/Misc/NEWS.d/next/Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst new file mode 100644 index 00000000000000..2b65eaa6dd70ad --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-21-19-10-30.gh-issue-115225.eRmfJH.rst @@ -0,0 +1 @@ +Raise error on certain technically valid but pathological ISO 8601 strings passed to :meth:`datetime.time.fromisoformat` that were previously parsed incorrectly. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index d6fa273c75e15e..bea6e9411a75ed 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1020,6 +1020,9 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour, continue; } else if (c == '.' || c == ',') { + if (i < 2) { + return -3; // Decimal mark on hour or minute + } break; } else if (!has_separator) { --p; From e83ce850f433fd8bbf8ff4e8d7649b942639db31 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Wed, 5 Jun 2024 18:54:50 +0100 Subject: [PATCH 102/373] pathlib ABCs: remove duplicate `realpath()` implementation. (#119178) Add private `posixpath._realpath()` function, which is a generic version of `realpath()` that can be parameterised with string tokens (`sep`, `curdir`, `pardir`) and query functions (`getcwd`, `lstat`, `readlink`). Also add support for limiting the number of symlink traversals. In the private `pathlib._abc.PathBase` class, call `posixpath._realpath()` and remove our re-implementation of the same algorithm. No change to any public APIs, either in `posixpath` or `pathlib`. Co-authored-by: Nice Zombies --- Lib/pathlib/_abc.py | 87 +++++++++++++++------------------------------ Lib/posixpath.py | 40 +++++++++++++++------ 2 files changed, 57 insertions(+), 70 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index d7471b6927331d..1a74f457c3f5a7 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -12,8 +12,8 @@ """ import functools +import posixpath from glob import _Globber, _no_recurse_symlinks -from errno import ENOTDIR, ELOOP from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -696,65 +696,34 @@ def resolve(self, strict=False): """ if self._resolving: return self - path_root, parts = self._stack - path = self.with_segments(path_root) - try: - path = path.absolute() - except UnsupportedOperation: - path_tail = [] - else: - path_root, path_tail = path._stack - path_tail.reverse() - - # If the user has *not* overridden the `readlink()` method, then symlinks are unsupported - # and (in non-strict mode) we can improve performance by not calling `stat()`. - querying = strict or getattr(self.readlink, '_supported', True) - link_count = 0 - while parts: - part = parts.pop() - if not part or part == '.': - continue - if part == '..': - if not path_tail: - if path_root: - # Delete '..' segment immediately following root - continue - elif path_tail[-1] != '..': - # Delete '..' segment and its predecessor - path_tail.pop() - continue - path_tail.append(part) - if querying and part != '..': - path = self.with_segments(path_root + self.parser.sep.join(path_tail)) + + def getcwd(): + return str(self.with_segments().absolute()) + + if strict or getattr(self.readlink, '_supported', True): + def lstat(path_str): + path = self.with_segments(path_str) path._resolving = True - try: - st = path.stat(follow_symlinks=False) - if S_ISLNK(st.st_mode): - # Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are - # encountered during resolution. - link_count += 1 - if link_count >= self._max_symlinks: - raise OSError(ELOOP, "Too many symbolic links in path", self._raw_path) - target_root, target_parts = path.readlink()._stack - # If the symlink target is absolute (like '/etc/hosts'), set the current - # path to its uppermost parent (like '/'). - if target_root: - path_root = target_root - path_tail.clear() - else: - path_tail.pop() - # Add the symlink target's reversed tail parts (like ['hosts', 'etc']) to - # the stack of unresolved path parts. - parts.extend(target_parts) - continue - elif parts and not S_ISDIR(st.st_mode): - raise NotADirectoryError(ENOTDIR, "Not a directory", self._raw_path) - except OSError: - if strict: - raise - else: - querying = False - return self.with_segments(path_root + self.parser.sep.join(path_tail)) + return path.lstat() + + def readlink(path_str): + path = self.with_segments(path_str) + path._resolving = True + return str(path.readlink()) + else: + # If the user has *not* overridden the `readlink()` method, then + # symlinks are unsupported and (in non-strict mode) we can improve + # performance by not calling `path.lstat()`. + def skip(path_str): + # This exception will be internally consumed by `_realpath()`. + raise OSError("Operation skipped.") + + lstat = readlink = skip + + return self.with_segments(posixpath._realpath( + str(self), strict, self.parser.sep, + getcwd=getcwd, lstat=lstat, readlink=readlink, + maxlinks=self._max_symlinks)) def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 47b2aa572e5c65..fccca4e066b76f 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -22,6 +22,7 @@ altsep = None devnull = '/dev/null' +import errno import os import sys import stat @@ -401,7 +402,10 @@ def realpath(filename, *, strict=False): curdir = '.' pardir = '..' getcwd = os.getcwd + return _realpath(filename, strict, sep, curdir, pardir, getcwd) +def _realpath(filename, strict=False, sep=sep, curdir=curdir, pardir=pardir, + getcwd=os.getcwd, lstat=os.lstat, readlink=os.readlink, maxlinks=None): # The stack of unresolved path parts. When popped, a special value of None # indicates that a symlink target has been resolved, and that the original # symlink path can be retrieved by popping again. The [::-1] slice is a @@ -418,6 +422,10 @@ def realpath(filename, *, strict=False): # the same links. seen = {} + # Number of symlinks traversed. When the number of traversals is limited + # by *maxlinks*, this is used instead of *seen* to detect symlink loops. + link_count = 0 + while rest: name = rest.pop() if name is None: @@ -436,11 +444,19 @@ def realpath(filename, *, strict=False): else: newpath = path + sep + name try: - st = os.lstat(newpath) + st = lstat(newpath) if not stat.S_ISLNK(st.st_mode): path = newpath continue - if newpath in seen: + elif maxlinks is not None: + link_count += 1 + if link_count > maxlinks: + if strict: + raise OSError(errno.ELOOP, os.strerror(errno.ELOOP), + newpath) + path = newpath + continue + elif newpath in seen: # Already seen this path path = seen[newpath] if path is not None: @@ -448,26 +464,28 @@ def realpath(filename, *, strict=False): continue # The symlink is not resolved, so we must have a symlink loop. if strict: - # Raise OSError(errno.ELOOP) - os.stat(newpath) + raise OSError(errno.ELOOP, os.strerror(errno.ELOOP), + newpath) path = newpath continue - target = os.readlink(newpath) + target = readlink(newpath) except OSError: if strict: raise path = newpath continue # Resolve the symbolic link - seen[newpath] = None # not resolved symlink if target.startswith(sep): # Symlink target is absolute; reset resolved path. path = sep - # Push the symlink path onto the stack, and signal its specialness by - # also pushing None. When these entries are popped, we'll record the - # fully-resolved symlink target in the 'seen' mapping. - rest.append(newpath) - rest.append(None) + if maxlinks is None: + # Mark this symlink as seen but not fully resolved. + seen[newpath] = None + # Push the symlink path onto the stack, and signal its specialness + # by also pushing None. When these entries are popped, we'll + # record the fully-resolved symlink target in the 'seen' mapping. + rest.append(newpath) + rest.append(None) # Push the unresolved symlink target parts onto the stack. rest.extend(target.split(sep)[::-1]) From f878d46e5614f08a9302fcb6fc611ef49e9acf2f Mon Sep 17 00:00:00 2001 From: Jan Kaliszewski Date: Wed, 5 Jun 2024 23:52:40 +0200 Subject: [PATCH 103/373] gh-120128: fix description of argument to ipaddress.collapse_addresses() (#120131) The argument to collapse_addresses() is now described as an *iterable* (rather than *iterator*). --- Doc/library/ipaddress.rst | 2 +- Lib/ipaddress.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/ipaddress.rst b/Doc/library/ipaddress.rst index ead841b0581e21..f58c0ea75a4753 100644 --- a/Doc/library/ipaddress.rst +++ b/Doc/library/ipaddress.rst @@ -990,7 +990,7 @@ The module also provides the following module level functions: .. function:: collapse_addresses(addresses) Return an iterator of the collapsed :class:`IPv4Network` or - :class:`IPv6Network` objects. *addresses* is an iterator of + :class:`IPv6Network` objects. *addresses* is an :term:`iterable` of :class:`IPv4Network` or :class:`IPv6Network` objects. A :exc:`TypeError` is raised if *addresses* contains mixed version objects. diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py index 8e4d49c859534d..9cef275f7ae2fc 100644 --- a/Lib/ipaddress.py +++ b/Lib/ipaddress.py @@ -310,7 +310,7 @@ def collapse_addresses(addresses): [IPv4Network('192.0.2.0/24')] Args: - addresses: An iterator of IPv4Network or IPv6Network objects. + addresses: An iterable of IPv4Network or IPv6Network objects. Returns: An iterator of the collapsed IPv(4|6)Network objects. From eeb8f67f837facb37f092a8b743f4d249515e82f Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 6 Jun 2024 11:56:58 +0100 Subject: [PATCH 104/373] gh-119786: move adaptive interpreter doc from Python to InternalsDoc (#120137) --- InternalDocs/README.md | 1 + {Python => InternalDocs}/adaptive.md | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) rename {Python => InternalDocs}/adaptive.md (93%) diff --git a/InternalDocs/README.md b/InternalDocs/README.md index e69e27d1542990..a2502fbf198735 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -14,3 +14,4 @@ it is not, please report that through the [Exception Handling](exception_handling.md) +[Adaptive Instruction Families](adaptive.md) diff --git a/Python/adaptive.md b/InternalDocs/adaptive.md similarity index 93% rename from Python/adaptive.md rename to InternalDocs/adaptive.md index d978c089b237e0..09245730b271fa 100644 --- a/Python/adaptive.md +++ b/InternalDocs/adaptive.md @@ -2,8 +2,9 @@ ## Families of instructions -The core part of PEP 659 (specializing adaptive interpreter) is the families -of instructions that perform the adaptive specialization. +The core part of [PEP 659](https://peps.python.org/pep-0659/) +(specializing adaptive interpreter) is the families of +instructions that perform the adaptive specialization. A family of instructions has the following fundamental properties: @@ -30,8 +31,9 @@ although these are not fundamental and may change: ## Example family -The `LOAD_GLOBAL` instruction (in Python/bytecodes.c) already has an adaptive -family that serves as a relatively simple example. +The `LOAD_GLOBAL` instruction (in +[Python/bytecodes.c](https://github.com/python/cpython/blob/main/Python/bytecodes.c)) +already has an adaptive family that serves as a relatively simple example. The `LOAD_GLOBAL` instruction performs adaptive specialization, calling `_Py_Specialize_LoadGlobal()` when the counter reaches zero. From fd104dfcb838d735ef8128e3539d7a730d403422 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Thu, 6 Jun 2024 13:40:37 +0200 Subject: [PATCH 105/373] gh-120111: Don't use cirrus M1 macOS runners on fork (#120116) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .github/workflows/build.yml | 10 ++++++---- .github/workflows/reusable-macos.yml | 9 ++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cde93c77a0b82e..eb325ac2f9ee1b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -199,8 +199,9 @@ jobs: uses: ./.github/workflows/reusable-macos.yml with: config_hash: ${{ needs.check_source.outputs.config_hash }} - # Cirrus is M1, macos-13 is default GHA Intel - os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma", "macos-13"]' + # Cirrus and macos-14 are M1, macos-13 is default GHA Intel. + # Cirrus used for upstream, macos-14 for forks. + os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma", "macos-14", "macos-13"]' build_macos_free_threading: name: 'macOS (free-threading)' @@ -210,8 +211,9 @@ jobs: with: config_hash: ${{ needs.check_source.outputs.config_hash }} free-threading: true - # Cirrus is M1 - os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma"]' + # Cirrus and macos-14 are M1. + # Cirrus used for upstream, macos-14 for forks. + os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma", "macos-14"]' build_ubuntu: name: 'Ubuntu' diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index d06a718d199c96..f825d1a7b3f69a 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -14,7 +14,7 @@ on: jobs: build_macos: - name: 'build and test' + name: build and test (${{ matrix.os }}) timeout-minutes: 60 env: HOMEBREW_NO_ANALYTICS: 1 @@ -27,6 +27,13 @@ jobs: fail-fast: false matrix: os: ${{fromJson(inputs.os-matrix)}} + is-fork: + - ${{ github.repository_owner != 'python' }} + exclude: + - os: "ghcr.io/cirruslabs/macos-runner:sonoma" + is-fork: true + - os: "macos-14" + is-fork: false runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 From cccc9f63c63ae693ccd0e2d8fc6cfd3aa18feb8e Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 6 Jun 2024 16:11:42 +0100 Subject: [PATCH 106/373] gh-119679: Fix layout of PYD and DLL files on Windows when using PC/layout script (GH-120133) --- PC/layout/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PC/layout/main.py b/PC/layout/main.py index 716f01097fe3b0..0350ed7af3f9b5 100644 --- a/PC/layout/main.py +++ b/PC/layout/main.py @@ -202,7 +202,7 @@ def in_build(f, dest="", new_name=None, no_lib=False): yield "LICENSE.txt", ns.build / "LICENSE.txt" - dest="" if ns.flat_dlls else "DLLs/" + dest = "" if ns.flat_dlls else "DLLs/" for _, src in rglob(ns.build, "*.pyd"): if ns.include_freethreaded: @@ -226,7 +226,7 @@ def in_build(f, dest="", new_name=None, no_lib=False): continue if src in EXCLUDE_FROM_DLLS: continue - yield from in_build(src.name, no_lib=True) + yield from in_build(src.name, dest=dest, no_lib=True) if ns.zip_lib: zip_name = PYTHON_ZIP_NAME From 78634cfa3dd4b542897835d5f097604dbeb0f3fd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 6 Jun 2024 17:31:33 +0200 Subject: [PATCH 107/373] gh-120155: Initialize variables in _tkinter.c (#120156) Initialize variables in _tkinter.c to make static analyzers happy. --- Modules/_tkinter.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 24f87c8d34c6b2..a34646aecb3ec8 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -1438,7 +1438,7 @@ Tkapp_Call(PyObject *selfptr, PyObject *args) marshal the parameters to the interpreter thread. */ Tkapp_CallEvent *ev; Tcl_Condition cond = NULL; - PyObject *exc; + PyObject *exc = NULL; // init to make static analyzers happy if (!WaitForMainloop(self)) return NULL; ev = (Tkapp_CallEvent*)attemptckalloc(sizeof(Tkapp_CallEvent)); @@ -1712,7 +1712,8 @@ var_invoke(EventFunc func, PyObject *selfptr, PyObject *args, int flags) TkappObject *self = (TkappObject*)selfptr; if (self->threaded && self->thread_id != Tcl_GetCurrentThread()) { VarEvent *ev; - PyObject *res, *exc; + // init 'res' and 'exc' to make static analyzers happy + PyObject *res = NULL, *exc = NULL; Tcl_Condition cond = NULL; /* The current thread is not the interpreter thread. Marshal @@ -2413,6 +2414,8 @@ _tkinter_tkapp_createcommand_impl(TkappObject *self, const char *name, data->self = self; data->func = Py_NewRef(func); if (self->threaded && self->thread_id != Tcl_GetCurrentThread()) { + err = 0; // init to make static analyzers happy + Tcl_Condition cond = NULL; CommandEvent *ev = (CommandEvent*)attemptckalloc(sizeof(CommandEvent)); if (ev == NULL) { @@ -2468,6 +2471,8 @@ _tkinter_tkapp_deletecommand_impl(TkappObject *self, const char *name) TRACE(self, ("((sss))", "rename", name, "")); if (self->threaded && self->thread_id != Tcl_GetCurrentThread()) { + err = 0; // init to make static analyzers happy + Tcl_Condition cond = NULL; CommandEvent *ev; ev = (CommandEvent*)attemptckalloc(sizeof(CommandEvent)); From d50a7c478feb4037e65fcaea453d9ecc00259dd9 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Thu, 6 Jun 2024 09:25:05 -0700 Subject: [PATCH 108/373] CODEOWNERS: Add myself to symtable and AST (#120139) Co-authored-by: Carl Meyer --- .github/CODEOWNERS | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ca5c8aaa3a0bef..c7021b3c2a4c40 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -40,6 +40,7 @@ Python/bytecodes.c @markshannon Python/optimizer*.c @markshannon Python/optimizer_analysis.c @Fidget-Spinner Python/optimizer_bytecodes.c @Fidget-Spinner +Python/symtable.c @JelleZijlstra @carljm Lib/_pyrepl/* @pablogsal @lysnikolaou @ambv Lib/test/test_patma.py @brandtbucher Lib/test/test_type_*.py @JelleZijlstra @@ -153,10 +154,10 @@ Include/internal/pycore_time.h @pganssle @abalkin /Tools/cases_generator/ @markshannon # AST -Python/ast.c @isidentical -Parser/asdl.py @isidentical -Parser/asdl_c.py @isidentical -Lib/ast.py @isidentical +Python/ast.c @isidentical @JelleZijlstra +Parser/asdl.py @isidentical @JelleZijlstra +Parser/asdl_c.py @isidentical @JelleZijlstra +Lib/ast.py @isidentical @JelleZijlstra # Mock /Lib/unittest/mock.py @cjw296 From 2d7ff6e0e7d4c08ba84079a5c19a4a485626e1de Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 6 Jun 2024 20:12:32 +0300 Subject: [PATCH 109/373] Restore decimal context after decimal doctests (GH-120149) The modified context caused tests failures in several other tests. --- Lib/test/test_decimal.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index e927e24b582a5d..46755107de0102 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -5892,13 +5892,17 @@ def load_tests(loader, tests, pattern): if TODO_TESTS is None: from doctest import DocTestSuite, IGNORE_EXCEPTION_DETAIL + orig_context = orig_sys_decimal.getcontext().copy() for mod in C, P: if not mod: continue def setUp(slf, mod=mod): sys.modules['decimal'] = mod - def tearDown(slf): + init(mod) + def tearDown(slf, mod=mod): sys.modules['decimal'] = orig_sys_decimal + mod.setcontext(ORIGINAL_CONTEXT[mod].copy()) + orig_sys_decimal.setcontext(orig_context.copy()) optionflags = IGNORE_EXCEPTION_DETAIL if mod is C else 0 sys.modules['decimal'] = mod tests.addTest(DocTestSuite(mod, setUp=setUp, tearDown=tearDown, @@ -5913,8 +5917,8 @@ def setUpModule(): TEST_ALL = ARITH if ARITH is not None else is_resource_enabled('decimal') def tearDownModule(): - if C: C.setcontext(ORIGINAL_CONTEXT[C]) - P.setcontext(ORIGINAL_CONTEXT[P]) + if C: C.setcontext(ORIGINAL_CONTEXT[C].copy()) + P.setcontext(ORIGINAL_CONTEXT[P].copy()) if not C: warnings.warn('C tests skipped: no module named _decimal.', UserWarning) From 417bec733c11e63df559ecf898802dbef590142e Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 6 Jun 2024 10:20:37 -0700 Subject: [PATCH 110/373] Add Tian Gao to CODEOWNERS and ACKS (GH-120166) --- .github/CODEOWNERS | 4 ++++ Misc/ACKS | 1 + 2 files changed, 5 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index c7021b3c2a4c40..811b8cfdab17dc 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -174,6 +174,10 @@ Lib/ast.py @isidentical @JelleZijlstra /Lib/test/test_subprocess.py @gpshead /Modules/*subprocess* @gpshead +# debugger +**/*pdb* @gaogaotiantian +**/*bdb* @gaogaotiantian + # Limited C API & stable ABI Tools/build/stable_abi.py @encukou Misc/stable_abi.toml @encukou diff --git a/Misc/ACKS b/Misc/ACKS index af92d81ff3141a..2f4c0793437fb6 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -610,6 +610,7 @@ Nitin Ganatra Soumendra Ganguly (गङ्गोपाध्याय) Fred Gansevles Paul Ganssle +Tian Gao Lars Marius Garshol Jake Garver Dan Gass From e21057b99967eb5323320e6d1121955e0cd2985e Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 6 Jun 2024 13:40:58 -0400 Subject: [PATCH 111/373] gh-117657: Fix TSAN race involving import lock (#118523) This adds a `_PyRecursiveMutex` type based on `PyMutex` and uses that for the import lock. This fixes some data races in the free-threaded build and generally simplifies the import lock code. --- Include/internal/pycore_import.h | 18 +---- Include/internal/pycore_lock.h | 12 ++++ Modules/_testinternalcapi/test_lock.c | 25 +++++++ Modules/posixmodule.c | 11 +-- Python/import.c | 83 ++-------------------- Python/lock.c | 42 +++++++++++ Tools/tsan/suppressions_free_threading.txt | 4 -- 7 files changed, 90 insertions(+), 105 deletions(-) diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index f8329a460d6cbf..290ba95e1a0ad7 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -20,7 +20,7 @@ PyAPI_FUNC(int) _PyImport_SetModule(PyObject *name, PyObject *module); extern int _PyImport_SetModuleString(const char *name, PyObject* module); extern void _PyImport_AcquireLock(PyInterpreterState *interp); -extern int _PyImport_ReleaseLock(PyInterpreterState *interp); +extern void _PyImport_ReleaseLock(PyInterpreterState *interp); // This is used exclusively for the sys and builtins modules: extern int _PyImport_FixupBuiltin( @@ -94,11 +94,7 @@ struct _import_state { #endif PyObject *import_func; /* The global import lock. */ - struct { - PyThread_type_lock mutex; - unsigned long thread; - int level; - } lock; + _PyRecursiveMutex lock; /* diagnostic info in PyImport_ImportModuleLevelObject() */ struct { int import_level; @@ -123,11 +119,6 @@ struct _import_state { #define IMPORTS_INIT \ { \ DLOPENFLAGS_INIT \ - .lock = { \ - .mutex = NULL, \ - .thread = PYTHREAD_INVALID_THREAD_ID, \ - .level = 0, \ - }, \ .find_and_load = { \ .header = 1, \ }, \ @@ -180,11 +171,6 @@ extern void _PyImport_FiniCore(PyInterpreterState *interp); extern void _PyImport_FiniExternal(PyInterpreterState *interp); -#ifdef HAVE_FORK -extern PyStatus _PyImport_ReInitLock(PyInterpreterState *interp); -#endif - - extern PyObject* _PyImport_GetBuiltinModuleNames(void); struct _module_alias { diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index a5b28e4bd4744e..d5853b2c9ff464 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -219,6 +219,18 @@ _PyOnceFlag_CallOnce(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg) return _PyOnceFlag_CallOnceSlow(flag, fn, arg); } +// A recursive mutex. The mutex should zero-initialized. +typedef struct { + PyMutex mutex; + unsigned long long thread; // i.e., PyThread_get_thread_ident_ex() + size_t level; +} _PyRecursiveMutex; + +PyAPI_FUNC(int) _PyRecursiveMutex_IsLockedByCurrentThread(_PyRecursiveMutex *m); +PyAPI_FUNC(void) _PyRecursiveMutex_Lock(_PyRecursiveMutex *m); +PyAPI_FUNC(void) _PyRecursiveMutex_Unlock(_PyRecursiveMutex *m); + + // A readers-writer (RW) lock. The lock supports multiple concurrent readers or // a single writer. The lock is write-preferring: if a writer is waiting while // the lock is read-locked then, new readers will be blocked. This avoids diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c index 4900459c689279..1544fe1363c7c5 100644 --- a/Modules/_testinternalcapi/test_lock.c +++ b/Modules/_testinternalcapi/test_lock.c @@ -2,6 +2,7 @@ #include "parts.h" #include "pycore_lock.h" +#include "pycore_pythread.h" // PyThread_get_thread_ident_ex() #include "clinic/test_lock.c.h" @@ -476,6 +477,29 @@ test_lock_rwlock(PyObject *self, PyObject *obj) Py_RETURN_NONE; } +static PyObject * +test_lock_recursive(PyObject *self, PyObject *obj) +{ + _PyRecursiveMutex m = (_PyRecursiveMutex){0}; + assert(!_PyRecursiveMutex_IsLockedByCurrentThread(&m)); + + _PyRecursiveMutex_Lock(&m); + assert(m.thread == PyThread_get_thread_ident_ex()); + assert(PyMutex_IsLocked(&m.mutex)); + assert(m.level == 0); + + _PyRecursiveMutex_Lock(&m); + assert(m.level == 1); + _PyRecursiveMutex_Unlock(&m); + + _PyRecursiveMutex_Unlock(&m); + assert(m.thread == 0); + assert(!PyMutex_IsLocked(&m.mutex)); + assert(m.level == 0); + + Py_RETURN_NONE; +} + static PyMethodDef test_methods[] = { {"test_lock_basic", test_lock_basic, METH_NOARGS}, {"test_lock_two_threads", test_lock_two_threads, METH_NOARGS}, @@ -485,6 +509,7 @@ static PyMethodDef test_methods[] = { {"test_lock_benchmark", test_lock_benchmark, METH_NOARGS}, {"test_lock_once", test_lock_once, METH_NOARGS}, {"test_lock_rwlock", test_lock_rwlock, METH_NOARGS}, + {"test_lock_recursive", test_lock_recursive, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 386e942d53f539..5f943d4b1c8085 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -16,7 +16,6 @@ #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _PyEval_ReInitThreads() #include "pycore_fileutils.h" // _Py_closerange() -#include "pycore_import.h" // _PyImport_ReInitLock() #include "pycore_initconfig.h" // _PyStatus_EXCEPTION() #include "pycore_long.h" // _PyLong_IsNegative() #include "pycore_moduleobject.h" // _PyModule_GetState() @@ -627,10 +626,7 @@ PyOS_AfterFork_Parent(void) _PyEval_StartTheWorldAll(&_PyRuntime); PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyImport_ReleaseLock(interp) <= 0) { - Py_FatalError("failed releasing import lock after fork"); - } - + _PyImport_ReleaseLock(interp); run_at_forkers(interp->after_forkers_parent, 0); } @@ -675,10 +671,7 @@ PyOS_AfterFork_Child(void) _PyEval_StartTheWorldAll(&_PyRuntime); _PyThreadState_DeleteList(list); - status = _PyImport_ReInitLock(tstate->interp); - if (_PyStatus_EXCEPTION(status)) { - goto fatal_error; - } + _PyImport_ReleaseLock(tstate->interp); _PySignal_AfterFork(); diff --git a/Python/import.c b/Python/import.c index 351d463dcab465..2c7a461ac786c8 100644 --- a/Python/import.c +++ b/Python/import.c @@ -94,11 +94,7 @@ static struct _inittab *inittab_copy = NULL; (interp)->imports.import_func #define IMPORT_LOCK(interp) \ - (interp)->imports.lock.mutex -#define IMPORT_LOCK_THREAD(interp) \ - (interp)->imports.lock.thread -#define IMPORT_LOCK_LEVEL(interp) \ - (interp)->imports.lock.level + (interp)->imports.lock #define FIND_AND_LOAD(interp) \ (interp)->imports.find_and_load @@ -115,74 +111,14 @@ static struct _inittab *inittab_copy = NULL; void _PyImport_AcquireLock(PyInterpreterState *interp) { - unsigned long me = PyThread_get_thread_ident(); - if (me == PYTHREAD_INVALID_THREAD_ID) - return; /* Too bad */ - if (IMPORT_LOCK(interp) == NULL) { - IMPORT_LOCK(interp) = PyThread_allocate_lock(); - if (IMPORT_LOCK(interp) == NULL) - return; /* Nothing much we can do. */ - } - if (IMPORT_LOCK_THREAD(interp) == me) { - IMPORT_LOCK_LEVEL(interp)++; - return; - } - if (IMPORT_LOCK_THREAD(interp) != PYTHREAD_INVALID_THREAD_ID || - !PyThread_acquire_lock(IMPORT_LOCK(interp), 0)) - { - PyThreadState *tstate = PyEval_SaveThread(); - PyThread_acquire_lock(IMPORT_LOCK(interp), WAIT_LOCK); - PyEval_RestoreThread(tstate); - } - assert(IMPORT_LOCK_LEVEL(interp) == 0); - IMPORT_LOCK_THREAD(interp) = me; - IMPORT_LOCK_LEVEL(interp) = 1; + _PyRecursiveMutex_Lock(&IMPORT_LOCK(interp)); } -int +void _PyImport_ReleaseLock(PyInterpreterState *interp) { - unsigned long me = PyThread_get_thread_ident(); - if (me == PYTHREAD_INVALID_THREAD_ID || IMPORT_LOCK(interp) == NULL) - return 0; /* Too bad */ - if (IMPORT_LOCK_THREAD(interp) != me) - return -1; - IMPORT_LOCK_LEVEL(interp)--; - assert(IMPORT_LOCK_LEVEL(interp) >= 0); - if (IMPORT_LOCK_LEVEL(interp) == 0) { - IMPORT_LOCK_THREAD(interp) = PYTHREAD_INVALID_THREAD_ID; - PyThread_release_lock(IMPORT_LOCK(interp)); - } - return 1; -} - -#ifdef HAVE_FORK -/* This function is called from PyOS_AfterFork_Child() to ensure that newly - created child processes do not share locks with the parent. - We now acquire the import lock around fork() calls but on some platforms - (Solaris 9 and earlier? see isue7242) that still left us with problems. */ -PyStatus -_PyImport_ReInitLock(PyInterpreterState *interp) -{ - if (IMPORT_LOCK(interp) != NULL) { - if (_PyThread_at_fork_reinit(&IMPORT_LOCK(interp)) < 0) { - return _PyStatus_ERR("failed to create a new lock"); - } - } - - if (IMPORT_LOCK_LEVEL(interp) > 1) { - /* Forked as a side effect of import */ - unsigned long me = PyThread_get_thread_ident(); - PyThread_acquire_lock(IMPORT_LOCK(interp), WAIT_LOCK); - IMPORT_LOCK_THREAD(interp) = me; - IMPORT_LOCK_LEVEL(interp)--; - } else { - IMPORT_LOCK_THREAD(interp) = PYTHREAD_INVALID_THREAD_ID; - IMPORT_LOCK_LEVEL(interp) = 0; - } - return _PyStatus_OK(); + _PyRecursiveMutex_Unlock(&IMPORT_LOCK(interp)); } -#endif /***************/ @@ -4111,11 +4047,6 @@ _PyImport_FiniCore(PyInterpreterState *interp) PyErr_FormatUnraisable("Exception ignored on clearing sys.modules"); } - if (IMPORT_LOCK(interp) != NULL) { - PyThread_free_lock(IMPORT_LOCK(interp)); - IMPORT_LOCK(interp) = NULL; - } - _PyImport_ClearCore(interp); } @@ -4248,8 +4179,7 @@ _imp_lock_held_impl(PyObject *module) /*[clinic end generated code: output=8b89384b5e1963fc input=9b088f9b217d9bdf]*/ { PyInterpreterState *interp = _PyInterpreterState_GET(); - return PyBool_FromLong( - IMPORT_LOCK_THREAD(interp) != PYTHREAD_INVALID_THREAD_ID); + return PyBool_FromLong(PyMutex_IsLocked(&IMPORT_LOCK(interp).mutex)); } /*[clinic input] @@ -4283,11 +4213,12 @@ _imp_release_lock_impl(PyObject *module) /*[clinic end generated code: output=7faab6d0be178b0a input=934fb11516dd778b]*/ { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyImport_ReleaseLock(interp) < 0) { + if (!_PyRecursiveMutex_IsLockedByCurrentThread(&IMPORT_LOCK(interp))) { PyErr_SetString(PyExc_RuntimeError, "not holding the import lock"); return NULL; } + _PyImport_ReleaseLock(interp); Py_RETURN_NONE; } diff --git a/Python/lock.c b/Python/lock.c index 239e56ad929ea3..555f4c25b9b214 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -366,6 +366,48 @@ _PyOnceFlag_CallOnceSlow(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg) } } +static int +recursive_mutex_is_owned_by(_PyRecursiveMutex *m, PyThread_ident_t tid) +{ + return _Py_atomic_load_ullong_relaxed(&m->thread) == tid; +} + +int +_PyRecursiveMutex_IsLockedByCurrentThread(_PyRecursiveMutex *m) +{ + return recursive_mutex_is_owned_by(m, PyThread_get_thread_ident_ex()); +} + +void +_PyRecursiveMutex_Lock(_PyRecursiveMutex *m) +{ + PyThread_ident_t thread = PyThread_get_thread_ident_ex(); + if (recursive_mutex_is_owned_by(m, thread)) { + m->level++; + return; + } + PyMutex_Lock(&m->mutex); + _Py_atomic_store_ullong_relaxed(&m->thread, thread); + assert(m->level == 0); +} + +void +_PyRecursiveMutex_Unlock(_PyRecursiveMutex *m) +{ + PyThread_ident_t thread = PyThread_get_thread_ident_ex(); + if (!recursive_mutex_is_owned_by(m, thread)) { + Py_FatalError("unlocking a recursive mutex that is not owned by the" + " current thread"); + } + if (m->level > 0) { + m->level--; + return; + } + assert(m->level == 0); + _Py_atomic_store_ullong_relaxed(&m->thread, 0); + PyMutex_Unlock(&m->mutex); +} + #define _Py_WRITE_LOCKED 1 #define _PyRWMutex_READER_SHIFT 2 #define _Py_RWMUTEX_MAX_READERS (UINTPTR_MAX >> _PyRWMutex_READER_SHIFT) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 8b64d1ff321858..cb48a30751ac7b 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -26,8 +26,6 @@ race:free_threadstate race_top:_add_to_weak_set race_top:_in_weak_set race_top:_PyEval_EvalFrameDefault -race_top:_PyImport_AcquireLock -race_top:_PyImport_ReleaseLock race_top:_PyType_HasFeature race_top:assign_version_tag race_top:insertdict @@ -41,9 +39,7 @@ race_top:set_discard_entry race_top:set_inheritable race_top:Py_SET_TYPE race_top:_PyDict_CheckConsistency -race_top:_PyImport_AcquireLock race_top:_Py_dict_lookup_threadsafe -race_top:_imp_release_lock race_top:_multiprocessing_SemLock_acquire_impl race_top:dictiter_new race_top:dictresize From 5bdc87b8859c837092e7c5b19583f98488f7a387 Mon Sep 17 00:00:00 2001 From: David Lowry-Duda Date: Thu, 6 Jun 2024 17:35:24 -0400 Subject: [PATCH 112/373] gh-120178: Documentation typo corrections (#120179) --- Doc/glossary.rst | 2 +- Doc/library/dis.rst | 2 +- Doc/library/pdb.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index ae9949bc2867c4..8685369117fd87 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -594,7 +594,7 @@ Glossary therefore it is never deallocated. Built-in strings and singletons are immortal objects. For example, - :const:`True` and :const:`None` singletons are immmortal. + :const:`True` and :const:`None` singletons are immortal. See `PEP 683 – Immortal Objects, Using a Fixed Refcount `_ for more information. diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 87d1bcdfaf3f1d..ab46d4554d8773 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1667,7 +1667,7 @@ iterations of the loop. A no-op. Performs internal tracing, debugging and optimization checks. - The ``context`` oparand consists of two parts. The lowest two bits + The ``context`` operand consists of two parts. The lowest two bits indicate where the ``RESUME`` occurs: * ``0`` The start of a function, which is neither a generator, coroutine diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst index cd6496203949ea..f6085171dccb38 100644 --- a/Doc/library/pdb.rst +++ b/Doc/library/pdb.rst @@ -341,7 +341,7 @@ can be overridden by the local file. With a *lineno* argument, set a break at line *lineno* in the current file. The line number may be prefixed with a *filename* and a colon, to specify a breakpoint in another file (possibly one that hasn't been loaded - yet). The file is searched on :data:`sys.path`. Accepatable forms of *filename* + yet). The file is searched on :data:`sys.path`. Acceptable forms of *filename* are ``/abspath/to/file.py``, ``relpath/file.py``, ``module`` and ``package.module``. From 14e1506a6d7056c38fbbc0797268dcf783f91243 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 7 Jun 2024 00:27:39 +0100 Subject: [PATCH 113/373] GH-119054: Add "Reading directories" section to pathlib docs (#119956) Add a dedicated subsection for `Path.iterdir()`-related methods, specifically `iterdir()`, `glob()`, `rglob()` and `walk()`. Co-authored-by: Jelle Zijlstra --- Doc/library/pathlib.rst | 197 +++++++++++++++++++++------------------- 1 file changed, 102 insertions(+), 95 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index f37bb33321fa53..b7ab44706a0160 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -820,6 +820,9 @@ bugs or failures in your application):: % (cls.__name__,)) UnsupportedOperation: cannot instantiate 'WindowsPath' on your system +Some concrete path methods can raise an :exc:`OSError` if a system call fails +(for example because the path doesn't exist). + Parsing and generating URIs ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1151,69 +1154,32 @@ Reading and writing files .. versionadded:: 3.5 -Other methods -^^^^^^^^^^^^^ - -Some of these methods can raise an :exc:`OSError` if a system call fails (for -example because the path doesn't exist). - - -.. classmethod:: Path.cwd() - - Return a new path object representing the current directory (as returned - by :func:`os.getcwd`):: - - >>> Path.cwd() - PosixPath('/home/antoine/pathlib') - - -.. classmethod:: Path.home() - - Return a new path object representing the user's home directory (as - returned by :func:`os.path.expanduser` with ``~`` construct). If the home - directory can't be resolved, :exc:`RuntimeError` is raised. - - :: - - >>> Path.home() - PosixPath('/home/antoine') - - .. versionadded:: 3.5 - - -.. method:: Path.chmod(mode, *, follow_symlinks=True) - - Change the file mode and permissions, like :func:`os.chmod`. - - This method normally follows symlinks. Some Unix flavours support changing - permissions on the symlink itself; on these platforms you may add the - argument ``follow_symlinks=False``, or use :meth:`~Path.lchmod`. - - :: - - >>> p = Path('setup.py') - >>> p.stat().st_mode - 33277 - >>> p.chmod(0o444) - >>> p.stat().st_mode - 33060 - - .. versionchanged:: 3.10 - The *follow_symlinks* parameter was added. +Reading directories +^^^^^^^^^^^^^^^^^^^ -.. method:: Path.expanduser() +.. method:: Path.iterdir() - Return a new path with expanded ``~`` and ``~user`` constructs, - as returned by :meth:`os.path.expanduser`. If a home directory can't be - resolved, :exc:`RuntimeError` is raised. + When the path points to a directory, yield path objects of the directory + contents:: - :: + >>> p = Path('docs') + >>> for child in p.iterdir(): child + ... + PosixPath('docs/conf.py') + PosixPath('docs/_templates') + PosixPath('docs/make.bat') + PosixPath('docs/index.rst') + PosixPath('docs/_build') + PosixPath('docs/_static') + PosixPath('docs/Makefile') - >>> p = PosixPath('~/films/Monty Python') - >>> p.expanduser() - PosixPath('/home/eric/films/Monty Python') + The children are yielded in arbitrary order, and the special entries + ``'.'`` and ``'..'`` are not included. If a file is removed from or added + to the directory after creating the iterator, it is unspecified whether + a path object for that file is included. - .. versionadded:: 3.5 + If the path is not a directory or otherwise inaccessible, :exc:`OSError` is + raised. .. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False) @@ -1281,43 +1247,6 @@ example because the path doesn't exist). The *pattern* parameter accepts a :term:`path-like object`. -.. method:: Path.group(*, follow_symlinks=True) - - Return the name of the group owning the file. :exc:`KeyError` is raised - if the file's gid isn't found in the system database. - - This method normally follows symlinks; to get the group of the symlink, add - the argument ``follow_symlinks=False``. - - .. versionchanged:: 3.13 - Raises :exc:`UnsupportedOperation` if the :mod:`grp` module is not - available. In previous versions, :exc:`NotImplementedError` was raised. - - .. versionchanged:: 3.13 - The *follow_symlinks* parameter was added. - - -.. method:: Path.iterdir() - - When the path points to a directory, yield path objects of the directory - contents:: - - >>> p = Path('docs') - >>> for child in p.iterdir(): child - ... - PosixPath('docs/conf.py') - PosixPath('docs/_templates') - PosixPath('docs/make.bat') - PosixPath('docs/index.rst') - PosixPath('docs/_build') - PosixPath('docs/_static') - PosixPath('docs/Makefile') - - The children are yielded in arbitrary order, and the special entries - ``'.'`` and ``'..'`` are not included. If a file is removed from or added - to the directory after creating the iterator, whether a path object for - that file be included is unspecified. - .. method:: Path.walk(top_down=True, on_error=None, follow_symlinks=False) Generate the file names in a directory tree by walking the tree @@ -1413,6 +1342,84 @@ example because the path doesn't exist). .. versionadded:: 3.12 + +Other methods +^^^^^^^^^^^^^ + +.. classmethod:: Path.cwd() + + Return a new path object representing the current directory (as returned + by :func:`os.getcwd`):: + + >>> Path.cwd() + PosixPath('/home/antoine/pathlib') + + +.. classmethod:: Path.home() + + Return a new path object representing the user's home directory (as + returned by :func:`os.path.expanduser` with ``~`` construct). If the home + directory can't be resolved, :exc:`RuntimeError` is raised. + + :: + + >>> Path.home() + PosixPath('/home/antoine') + + .. versionadded:: 3.5 + + +.. method:: Path.chmod(mode, *, follow_symlinks=True) + + Change the file mode and permissions, like :func:`os.chmod`. + + This method normally follows symlinks. Some Unix flavours support changing + permissions on the symlink itself; on these platforms you may add the + argument ``follow_symlinks=False``, or use :meth:`~Path.lchmod`. + + :: + + >>> p = Path('setup.py') + >>> p.stat().st_mode + 33277 + >>> p.chmod(0o444) + >>> p.stat().st_mode + 33060 + + .. versionchanged:: 3.10 + The *follow_symlinks* parameter was added. + +.. method:: Path.expanduser() + + Return a new path with expanded ``~`` and ``~user`` constructs, + as returned by :meth:`os.path.expanduser`. If a home directory can't be + resolved, :exc:`RuntimeError` is raised. + + :: + + >>> p = PosixPath('~/films/Monty Python') + >>> p.expanduser() + PosixPath('/home/eric/films/Monty Python') + + .. versionadded:: 3.5 + + +.. method:: Path.group(*, follow_symlinks=True) + + Return the name of the group owning the file. :exc:`KeyError` is raised + if the file's gid isn't found in the system database. + + This method normally follows symlinks; to get the group of the symlink, add + the argument ``follow_symlinks=False``. + + .. versionchanged:: 3.13 + Raises :exc:`UnsupportedOperation` if the :mod:`grp` module is not + available. In previous versions, :exc:`NotImplementedError` was raised. + + .. versionchanged:: 3.13 + The *follow_symlinks* parameter was added. + + .. method:: Path.lchmod(mode) Like :meth:`Path.chmod` but, if the path points to a symbolic link, the From 6b606522ca97488aad6fe2f193d4511e7a8f8334 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 6 Jun 2024 23:18:30 -0400 Subject: [PATCH 114/373] gh-119577: Adjust DeprecationWarning when testing element truth values in ElementTree (GH-119762) Adjust DeprecationWarning when testing element truth values in ElementTree, we're planning to go with the more natural True return rather than a disruptive harder to code around exception raise, and are deferring the behavior change for a few more releases. --- Doc/library/xml.etree.elementtree.rst | 7 ++++--- Doc/whatsnew/3.12.rst | 7 +++++-- Doc/whatsnew/3.13.rst | 10 +++++----- Lib/test/test_xml_etree.py | 2 +- Lib/xml/etree/ElementTree.py | 2 +- .../2024-05-29-21-50-05.gh-issue-119577.S3BlKJ.rst | 4 ++++ Modules/_elementtree.c | 2 +- 7 files changed, 21 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-29-21-50-05.gh-issue-119577.S3BlKJ.rst diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index a6a9eb87f56d88..e5919029c62c93 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -1058,9 +1058,10 @@ Element Objects :meth:`~object.__getitem__`, :meth:`~object.__setitem__`, :meth:`~object.__len__`. - Caution: Elements with no subelements will test as ``False``. Testing the - truth value of an Element is deprecated and will raise an exception in - Python 3.14. Use specific ``len(elem)`` or ``elem is None`` test instead.:: + Caution: Elements with no subelements will test as ``False``. In a future + release of Python, all elements will test as ``True`` regardless of whether + subelements exist. Instead, prefer explicit ``len(elem)`` or + ``elem is not None`` tests.:: element = root.find('foo') diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index f99489fb53db74..28b28e9ce50e11 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -1440,8 +1440,6 @@ and will be removed in Python 3.14. * :mod:`typing`: :class:`!typing.ByteString` -* :mod:`xml.etree.ElementTree`: Testing the truth value of an :class:`xml.etree.ElementTree.Element`. - * The ``__package__`` and ``__cached__`` attributes on module objects. * The :attr:`~codeobject.co_lnotab` attribute of code objects. @@ -1467,6 +1465,11 @@ although there is currently no date scheduled for their removal. * :class:`typing.Text` (:gh:`92332`) +* :mod:`xml.etree.ElementTree`: Testing the truth value of an + :class:`xml.etree.ElementTree.Element` is deprecated. In a future release it + will always return True. Prefer explicit ``len(elem)`` or + ``elem is not None`` tests instead. + * Currently Python accepts numeric literals immediately followed by keywords, for example ``0in x``, ``1or x``, ``0if 1else 2``. It allows confusing and ambiguous expressions like ``[0x1for x in y]`` (which can be diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index a1d2a0d84e7581..81daaabdb889d7 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1728,11 +1728,6 @@ Pending Removal in Python 3.14 public API. (Contributed by Gregory P. Smith in :gh:`88168`.) -* :mod:`xml.etree.ElementTree`: Testing the truth value of an - :class:`~xml.etree.ElementTree.Element` is deprecated and will raise an - exception in Python 3.14. - - Pending Removal in Python 3.15 ------------------------------ @@ -1937,6 +1932,11 @@ although there is currently no date scheduled for their removal. * :mod:`wsgiref`: ``SimpleHandler.stdout.write()`` should not do partial writes. +* :mod:`xml.etree.ElementTree`: Testing the truth value of an + :class:`~xml.etree.ElementTree.Element` is deprecated. In a future release it + it will always return ``True``. Prefer explicit ``len(elem)`` or + ``elem is not None`` tests instead. + * :meth:`zipimport.zipimporter.load_module` is deprecated: use :meth:`~zipimport.zipimporter.exec_module` instead. diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index bae61f754e75f5..3d9141fea1ef3e 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -4088,7 +4088,7 @@ class BoolTest(unittest.TestCase): def test_warning(self): e = ET.fromstring('') msg = ( - r"Testing an element's truth value will raise an exception in " + r"Testing an element's truth value will always return True in " r"future versions. " r"Use specific 'len\(elem\)' or 'elem is not None' test instead.") with self.assertWarnsRegex(DeprecationWarning, msg): diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 9e15d34d22aa6c..ce67d7d7d54748 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -201,7 +201,7 @@ def __len__(self): def __bool__(self): warnings.warn( - "Testing an element's truth value will raise an exception in " + "Testing an element's truth value will always return True in " "future versions. " "Use specific 'len(elem)' or 'elem is not None' test instead.", DeprecationWarning, stacklevel=2 diff --git a/Misc/NEWS.d/next/Library/2024-05-29-21-50-05.gh-issue-119577.S3BlKJ.rst b/Misc/NEWS.d/next/Library/2024-05-29-21-50-05.gh-issue-119577.S3BlKJ.rst new file mode 100644 index 00000000000000..bd2daf3fb5c16d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-29-21-50-05.gh-issue-119577.S3BlKJ.rst @@ -0,0 +1,4 @@ +The :exc:`DeprecationWarning` emitted when testing the truth value of an +:class:`xml.etree.ElementTree.Element` now describes unconditionally +returning ``True`` in a future version rather than raising an exception in +Python 3.14. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index b11983d2caa2d1..3818e20b4f0f28 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -1502,7 +1502,7 @@ element_bool(PyObject* self_) { ElementObject* self = (ElementObject*) self_; if (PyErr_WarnEx(PyExc_DeprecationWarning, - "Testing an element's truth value will raise an exception " + "Testing an element's truth value will always return True " "in future versions. Use specific 'len(elem)' or " "'elem is not None' test instead.", 1) < 0) { From 5c115567b1e3aecb7a53cfd5757e25c088398411 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 7 Jun 2024 00:38:31 -0600 Subject: [PATCH 115/373] Add Plausible for docs metrics (#119977) Co-authored-by: Julien Palard --- Doc/conf.py | 3 ++- Doc/tools/templates/layout.html | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Doc/conf.py b/Doc/conf.py index 47fb96fe1de482..8a14646801ebac 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -339,7 +339,8 @@ html_context = { "is_deployment_preview": os.getenv("READTHEDOCS_VERSION_TYPE") == "external", "repository_url": repository_url.removesuffix(".git") if repository_url else None, - "pr_id": os.getenv("READTHEDOCS_VERSION") + "pr_id": os.getenv("READTHEDOCS_VERSION"), + "enable_analytics": os.getenv("PYTHON_DOCS_ENABLE_ANALYTICS"), } # This 'Last updated on:' timestamp is inserted at the bottom of every page. diff --git a/Doc/tools/templates/layout.html b/Doc/tools/templates/layout.html index 3f88fc8e91faad..b09fd21a8ddcc9 100644 --- a/Doc/tools/templates/layout.html +++ b/Doc/tools/templates/layout.html @@ -26,6 +26,9 @@ {% endblock %} {% block extrahead %} + {% if builder == "html" and enable_analytics %} + + {% endif %} {% if builder != "htmlhelp" %} {% if pagename == 'whatsnew/changelog' and not embedded %} From bd826b9c77dbf7c789433cb8061c733c08634c0e Mon Sep 17 00:00:00 2001 From: Clinton Date: Fri, 7 Jun 2024 03:39:19 -0400 Subject: [PATCH 116/373] gh-120157: Remove unused code in concurrent.future (gh-120187) --- Lib/concurrent/futures/_base.py | 8 -------- .../2024-06-07-02-00-31.gh-issue-120157.HnWcF9.rst | 1 + 2 files changed, 1 insertion(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-07-02-00-31.gh-issue-120157.HnWcF9.rst diff --git a/Lib/concurrent/futures/_base.py b/Lib/concurrent/futures/_base.py index 6742a07753c921..707fcdfde79acd 100644 --- a/Lib/concurrent/futures/_base.py +++ b/Lib/concurrent/futures/_base.py @@ -23,14 +23,6 @@ CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED' FINISHED = 'FINISHED' -_FUTURE_STATES = [ - PENDING, - RUNNING, - CANCELLED, - CANCELLED_AND_NOTIFIED, - FINISHED -] - _STATE_TO_DESCRIPTION_MAP = { PENDING: "pending", RUNNING: "running", diff --git a/Misc/NEWS.d/next/Library/2024-06-07-02-00-31.gh-issue-120157.HnWcF9.rst b/Misc/NEWS.d/next/Library/2024-06-07-02-00-31.gh-issue-120157.HnWcF9.rst new file mode 100644 index 00000000000000..3e905125797af7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-07-02-00-31.gh-issue-120157.HnWcF9.rst @@ -0,0 +1 @@ +Remove unused constant ``concurrent.futures._base._FUTURE_STATES`` in :mod:`concurrent.futures`. Patch by Clinton Christian (pygeek). From 57ad769076201c858a768d81047f6ea44925a33b Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Fri, 7 Jun 2024 11:03:28 +0300 Subject: [PATCH 117/373] gh-120080: Accept ``None`` as a valid argument for direct call of the ``int.__round__`` (#120088) Co-authored-by: Nikita Sobolev --- Lib/test/test_float.py | 6 ++++++ Lib/test/test_inspect/test_inspect.py | 1 - Lib/test/test_int.py | 6 ++++++ .../2024-06-05-08-39-40.gh-issue-120080.DJFK11.rst | 2 ++ Objects/clinic/longobject.c.h | 6 +++--- Objects/longobject.c | 6 +++--- 6 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-05-08-39-40.gh-issue-120080.DJFK11.rst diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 5bd640617d6874..53695cefb8fded 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -949,6 +949,12 @@ def test_None_ndigits(self): self.assertEqual(x, 2) self.assertIsInstance(x, int) + def test_round_with_none_arg_direct_call(self): + for val in [(1.0).__round__(None), + round(1.0), + round(1.0, None)]: + self.assertEqual(val, 1) + self.assertIs(type(val), int) # Beginning with Python 2.6 float has cross platform compatible # ways to create and represent inf and nan diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index 011d42f34b6461..65007c16203c6d 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -5412,7 +5412,6 @@ def test_builtins_have_signatures(self): 'bytearray': {'count', 'endswith', 'find', 'hex', 'index', 'rfind', 'rindex', 'startswith'}, 'bytes': {'count', 'endswith', 'find', 'hex', 'index', 'rfind', 'rindex', 'startswith'}, 'dict': {'pop'}, - 'int': {'__round__'}, 'memoryview': {'cast', 'hex'}, 'str': {'count', 'endswith', 'find', 'index', 'maketrans', 'rfind', 'rindex', 'startswith'}, } diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index ce9febd741bba2..77221dfb6d5aa2 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -517,6 +517,12 @@ def test_issue31619(self): self.assertEqual(int('1_2_3_4_5_6_7_8_9', 16), 0x123456789) self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807) + def test_round_with_none_arg_direct_call(self): + for val in [(1).__round__(None), + round(1), + round(1, None)]: + self.assertEqual(val, 1) + self.assertIs(type(val), int) class IntStrDigitLimitsTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-05-08-39-40.gh-issue-120080.DJFK11.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-05-08-39-40.gh-issue-120080.DJFK11.rst new file mode 100644 index 00000000000000..8c5602fcdb4ad2 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-05-08-39-40.gh-issue-120080.DJFK11.rst @@ -0,0 +1,2 @@ +Direct call to the :meth:`!int.__round__` now accepts ``None`` +as a valid argument. diff --git a/Objects/clinic/longobject.c.h b/Objects/clinic/longobject.c.h index 56bc3864582dcb..90375b9a082cca 100644 --- a/Objects/clinic/longobject.c.h +++ b/Objects/clinic/longobject.c.h @@ -116,7 +116,7 @@ int___format__(PyObject *self, PyObject *arg) } PyDoc_STRVAR(int___round____doc__, -"__round__($self, ndigits=, /)\n" +"__round__($self, ndigits=None, /)\n" "--\n" "\n" "Rounding an Integral returns itself.\n" @@ -133,7 +133,7 @@ static PyObject * int___round__(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - PyObject *o_ndigits = NULL; + PyObject *o_ndigits = Py_None; if (!_PyArg_CheckPositional("__round__", nargs, 0, 1)) { goto exit; @@ -476,4 +476,4 @@ int_is_integer(PyObject *self, PyObject *Py_UNUSED(ignored)) { return int_is_integer_impl(self); } -/*[clinic end generated code: output=2ba2d8dcda9b99da input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a53f5ba9a6c16737 input=a9049054013a1b77]*/ diff --git a/Objects/longobject.c b/Objects/longobject.c index ee0b2a038a2aab..a3a59a20f0bb97 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6045,7 +6045,7 @@ _PyLong_DivmodNear(PyObject *a, PyObject *b) /*[clinic input] int.__round__ - ndigits as o_ndigits: object = NULL + ndigits as o_ndigits: object = None / Rounding an Integral returns itself. @@ -6055,7 +6055,7 @@ Rounding with an ndigits argument also returns an integer. static PyObject * int___round___impl(PyObject *self, PyObject *o_ndigits) -/*[clinic end generated code: output=954fda6b18875998 input=1614cf23ec9e18c3]*/ +/*[clinic end generated code: output=954fda6b18875998 input=30c2aec788263144]*/ { PyObject *temp, *result, *ndigits; @@ -6073,7 +6073,7 @@ int___round___impl(PyObject *self, PyObject *o_ndigits) * * m - divmod_near(m, 10**n)[1]. */ - if (o_ndigits == NULL) + if (o_ndigits == Py_None) return long_long(self); ndigits = _PyNumber_Index(o_ndigits); From 6a97929a5ad76c55bc6e1cf32898a1c31093334d Mon Sep 17 00:00:00 2001 From: Xie Yanbo Date: Fri, 7 Jun 2024 16:19:41 +0800 Subject: [PATCH 118/373] Fix typos in comments (#120188) --- Include/ceval.h | 2 +- Include/cpython/object.h | 2 +- Include/cpython/pyframe.h | 2 +- Include/internal/mimalloc/mimalloc/atomic.h | 2 +- Include/internal/mimalloc/mimalloc/internal.h | 2 +- Include/internal/pycore_initconfig.h | 2 +- Include/internal/pycore_instruments.h | 2 +- Include/internal/pycore_lock.h | 2 +- Include/internal/pycore_pythread.h | 2 +- Include/internal/pycore_unicodeobject.h | 2 +- Include/structmember.h | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Include/ceval.h b/Include/ceval.h index 1ec746c3708220..e9df8684996e23 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -42,7 +42,7 @@ PyAPI_FUNC(int) Py_MakePendingCalls(void); level exceeds "current recursion limit + 50". By construction, this protection can only be triggered when the "overflowed" flag is set. It means the cleanup code has itself gone into an infinite loop, or the - RecursionError has been mistakingly ignored. When this protection is + RecursionError has been mistakenly ignored. When this protection is triggered, the interpreter aborts with a Fatal Error. In addition, the "overflowed" flag is automatically reset when the diff --git a/Include/cpython/object.h b/Include/cpython/object.h index e624326693d8e7..0ab94e5e2a15e5 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -313,7 +313,7 @@ PyAPI_FUNC(PyObject *) _PyObject_FunctionStr(PyObject *); * triggered as a side-effect of `dst` getting torn down no longer believes * `dst` points to a valid object. * - * Temporary variables are used to only evalutate macro arguments once and so + * Temporary variables are used to only evaluate macro arguments once and so * avoid the duplication of side effects. _Py_TYPEOF() or memcpy() is used to * avoid a miscompilation caused by type punning. See Py_CLEAR() comment for * implementation details about type punning. diff --git a/Include/cpython/pyframe.h b/Include/cpython/pyframe.h index eeafbb17a56bad..51529763923ec3 100644 --- a/Include/cpython/pyframe.h +++ b/Include/cpython/pyframe.h @@ -28,7 +28,7 @@ struct _PyInterpreterFrame; * Does not raise an exception. */ PyAPI_FUNC(PyObject *) PyUnstable_InterpreterFrame_GetCode(struct _PyInterpreterFrame *frame); -/* Returns a byte ofsset into the last executed instruction. +/* Returns a byte offset into the last executed instruction. * Does not raise an exception. */ PyAPI_FUNC(int) PyUnstable_InterpreterFrame_GetLasti(struct _PyInterpreterFrame *frame); diff --git a/Include/internal/mimalloc/mimalloc/atomic.h b/Include/internal/mimalloc/mimalloc/atomic.h index eb8478ceed6adf..52f82487685cdb 100644 --- a/Include/internal/mimalloc/mimalloc/atomic.h +++ b/Include/internal/mimalloc/mimalloc/atomic.h @@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file // -------------------------------------------------------------------------------------------- // Atomics // We need to be portable between C, C++, and MSVC. -// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. +// We base the primitives on the C/C++ atomics and create a minimal wrapper for MSVC in C compilation mode. // This is why we try to use only `uintptr_t` and `*` as atomic types. // To gain better insight in the range of used atomics, we use explicitly named memory order operations // instead of passing the memory order as a parameter. diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h index 94f88fb603af25..d97f51b8eefbe5 100644 --- a/Include/internal/mimalloc/mimalloc/internal.h +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file // -------------------------------------------------------------------------- -// This file contains the interal API's of mimalloc and various utility +// This file contains the internal API's of mimalloc and various utility // functions and macros. // -------------------------------------------------------------------------- diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 1c68161341860a..6bf1b53bffd3ba 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -157,7 +157,7 @@ typedef enum { /* For now, this means the GIL is enabled. gh-116329: This will eventually change to "the GIL is disabled but can - be reenabled by loading an incompatible extension module." */ + be re-enabled by loading an incompatible extension module." */ _PyConfig_GIL_DEFAULT = -1, /* The GIL has been forced off or on, and will not be affected by module loading. */ diff --git a/Include/internal/pycore_instruments.h b/Include/internal/pycore_instruments.h index c98e82c8be5546..4e5b374968ea98 100644 --- a/Include/internal/pycore_instruments.h +++ b/Include/internal/pycore_instruments.h @@ -23,7 +23,7 @@ typedef uint32_t _PyMonitoringEventSet; #define PY_MONITORING_PROFILER_ID 2 #define PY_MONITORING_OPTIMIZER_ID 5 -/* Internal IDs used to suuport sys.setprofile() and sys.settrace() */ +/* Internal IDs used to support sys.setprofile() and sys.settrace() */ #define PY_MONITORING_SYS_PROFILE_ID 6 #define PY_MONITORING_SYS_TRACE_ID 7 diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index d5853b2c9ff464..882c4888e5058c 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -116,7 +116,7 @@ typedef enum _PyLockFlags { extern PyLockStatus _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout_ns, _PyLockFlags flags); -// Lock a mutex with aditional options. See _PyLockFlags for details. +// Lock a mutex with additional options. See _PyLockFlags for details. static inline void PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags) { diff --git a/Include/internal/pycore_pythread.h b/Include/internal/pycore_pythread.h index 3610c6254db6af..f3f5942444e851 100644 --- a/Include/internal/pycore_pythread.h +++ b/Include/internal/pycore_pythread.h @@ -147,7 +147,7 @@ PyAPI_FUNC(int) PyThread_start_joinable_thread(void (*func)(void *), PyAPI_FUNC(int) PyThread_join_thread(PyThread_handle_t); /* * Detach a thread started with `PyThread_start_joinable_thread`, such - * that its resources are relased as soon as it exits. + * that its resources are released as soon as it exits. * This function cannot be interrupted. It returns 0 on success, * a non-zero value on failure. */ diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index fea5ceea0954f4..026d6e461f2108 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -189,7 +189,7 @@ extern PyObject* _PyUnicode_EncodeCharmap( /* --- Decimal Encoder ---------------------------------------------------- */ -// Coverts a Unicode object holding a decimal value to an ASCII string +// Converts a Unicode object holding a decimal value to an ASCII string // for using in int, float and complex parsers. // Transforms code points that have decimal digit property to the // corresponding ASCII digit code points. Transforms spaces to ASCII. diff --git a/Include/structmember.h b/Include/structmember.h index f6e8fd829892f4..5f29fbcfed99e3 100644 --- a/Include/structmember.h +++ b/Include/structmember.h @@ -11,7 +11,7 @@ extern "C" { * New definitions are in descrobject.h. * * However, there's nothing wrong with old code continuing to use it, - * and there's not much mainenance overhead in maintaining a few aliases. + * and there's not much maintenance overhead in maintaining a few aliases. * So, don't be too eager to convert old code. * * It uses names not prefixed with Py_. From 47816f465e833a5257a82b759b1081e06381e528 Mon Sep 17 00:00:00 2001 From: Michael Allwright Date: Fri, 7 Jun 2024 10:38:15 +0200 Subject: [PATCH 119/373] gh-120154: Fix Emscripten/WASI pattern in case statement for LDSHARED (#120173) Fix Emscripten/WASI pattern in case statement for LDSHARED --- configure | 2 +- configure.ac | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 6cfe114fb2104c..8e605d31bb5eca 100755 --- a/configure +++ b/configure @@ -12892,7 +12892,7 @@ then LDCXXSHARED='$(CXX) -dynamiclib -F . -framework $(PYTHONFRAMEWORK)' BLDSHARED="$LDSHARED" ;; - Emscripten|WASI) + Emscripten*|WASI*) LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared';; Linux*|GNU*|QNX*|VxWorks*|Haiku*) diff --git a/configure.ac b/configure.ac index 8657e09c9a7008..41023ab92bad81 100644 --- a/configure.ac +++ b/configure.ac @@ -3417,7 +3417,7 @@ then LDCXXSHARED='$(CXX) -dynamiclib -F . -framework $(PYTHONFRAMEWORK)' BLDSHARED="$LDSHARED" ;; - Emscripten|WASI) + Emscripten*|WASI*) LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared';; Linux*|GNU*|QNX*|VxWorks*|Haiku*) From d5ba4fc9bc9b2d9eff2a90893e8d500e0c367237 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Fri, 7 Jun 2024 12:14:13 +0300 Subject: [PATCH 120/373] gh-120164: Fix test_os.test_win32_mkdir_700() (#120177) Don't compare the path to avoid encoding issues. Co-authored-by: Eryk Sun --- Lib/test/test_os.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index de5a86f676c4d5..2beb9ca8aa6ccb 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1837,9 +1837,10 @@ def test_win32_mkdir_700(self): os.mkdir(path, mode=0o700) out = subprocess.check_output(["cacls.exe", path, "/s"], encoding="oem") os.rmdir(path) + out = out.strip().rsplit(" ", 1)[1] self.assertEqual( - out.strip(), - f'{path} "D:P(A;OICI;FA;;;SY)(A;OICI;FA;;;BA)(A;OICI;FA;;;OW)"', + out, + '"D:P(A;OICI;FA;;;SY)(A;OICI;FA;;;BA)(A;OICI;FA;;;OW)"', ) def tearDown(self): From 6646a9da26d12fc54263b22dd2916a2f710f1db7 Mon Sep 17 00:00:00 2001 From: Aditya Borikar Date: Fri, 7 Jun 2024 03:44:42 -0600 Subject: [PATCH 121/373] gh-110383: Clarify "non-integral" wording in pow() docs (#119688) --- Doc/library/functions.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 4617767a71be18..1d82f92ea67857 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1561,7 +1561,9 @@ are always available. They are listed here in alphabetical order. returns ``100``, but ``pow(10, -2)`` returns ``0.01``. For a negative base of type :class:`int` or :class:`float` and a non-integral exponent, a complex result is delivered. For example, ``pow(-9, 0.5)`` returns a value close - to ``3j``. + to ``3j``. Whereas, for a negative base of type :class:`int` or :class:`float` + with an integral exponent, a float result is delivered. For example, + ``pow(-9, 2.0)`` returns ``81.0``. For :class:`int` operands *base* and *exp*, if *mod* is present, *mod* must also be of integer type and *mod* must be nonzero. If *mod* is present and From d68a22e7a68ae09f7db61d5a1a3bd9c0360cf3ee Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 7 Jun 2024 13:49:07 +0300 Subject: [PATCH 122/373] gh-120211: Fix tkinter.ttk with Tcl/Tk 9.0 (GH-120213) * Use new methods for tracing Tcl variable. * Fix Combobox.current() for empty combobox. --- Lib/tkinter/ttk.py | 9 ++++++--- .../2024-06-07-13-21-11.gh-issue-120211.Rws_gf.rst | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-07-13-21-11.gh-issue-120211.Rws_gf.rst diff --git a/Lib/tkinter/ttk.py b/Lib/tkinter/ttk.py index 5ca938a670831a..073b3ae20797c3 100644 --- a/Lib/tkinter/ttk.py +++ b/Lib/tkinter/ttk.py @@ -690,7 +690,10 @@ def current(self, newindex=None): returns the index of the current value in the list of values or -1 if the current value does not appear in the list.""" if newindex is None: - return self.tk.getint(self.tk.call(self._w, "current")) + res = self.tk.call(self._w, "current") + if res == '': + return -1 + return self.tk.getint(res) return self.tk.call(self._w, "current", newindex) @@ -1522,7 +1525,7 @@ def __init__(self, master=None, variable=None, from_=0, to=10, **kw): self.label.place(anchor='n' if label_side == 'top' else 's') # update the label as scale or variable changes - self.__tracecb = self._variable.trace_variable('w', self._adjust) + self.__tracecb = self._variable.trace_add('write', self._adjust) self.bind('', self._adjust) self.bind('', self._adjust) @@ -1530,7 +1533,7 @@ def __init__(self, master=None, variable=None, from_=0, to=10, **kw): def destroy(self): """Destroy this widget and possibly its associated variable.""" try: - self._variable.trace_vdelete('w', self.__tracecb) + self._variable.trace_remove('write', self.__tracecb) except AttributeError: pass else: diff --git a/Misc/NEWS.d/next/Library/2024-06-07-13-21-11.gh-issue-120211.Rws_gf.rst b/Misc/NEWS.d/next/Library/2024-06-07-13-21-11.gh-issue-120211.Rws_gf.rst new file mode 100644 index 00000000000000..0106f2d93318b4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-07-13-21-11.gh-issue-120211.Rws_gf.rst @@ -0,0 +1 @@ +Fix :mod:`tkinter.ttk` with Tcl/Tk 9.0. From eca3f7762c23b22a73a5e0b09520748c88aab4a0 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:06:24 +0100 Subject: [PATCH 123/373] gh-93691: fix too broad source locations of with-statement instructions (#120125) --- Lib/test/test_with.py | 44 +++++++++++++++++++ ...4-06-05-18-29-18.gh-issue-93691.6OautB.rst | 1 + Python/compile.c | 5 +-- 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-05-18-29-18.gh-issue-93691.6OautB.rst diff --git a/Lib/test/test_with.py b/Lib/test/test_with.py index d81902327a7e0a..e8c4ddf979e2ee 100644 --- a/Lib/test/test_with.py +++ b/Lib/test/test_with.py @@ -5,6 +5,7 @@ __email__ = "mbland at acm dot org" import sys +import traceback import unittest from collections import deque from contextlib import _GeneratorContextManager, contextmanager, nullcontext @@ -749,5 +750,48 @@ def testEnterReturnsTuple(self): self.assertEqual(10, b1) self.assertEqual(20, b2) + def testExceptionLocation(self): + # The location of an exception raised from + # __init__, __enter__ or __exit__ of a context + # manager should be just the context manager expression, + # pinpointing the precise context manager in case there + # is more than one. + + def init_raises(): + try: + with self.Dummy(), self.InitRaises() as cm, self.Dummy() as d: + pass + except Exception as e: + return e + + def enter_raises(): + try: + with self.EnterRaises(), self.Dummy() as d: + pass + except Exception as e: + return e + + def exit_raises(): + try: + with self.ExitRaises(), self.Dummy() as d: + pass + except Exception as e: + return e + + for func, expected in [(init_raises, "self.InitRaises()"), + (enter_raises, "self.EnterRaises()"), + (exit_raises, "self.ExitRaises()"), + ]: + with self.subTest(func): + exc = func() + f = traceback.extract_tb(exc.__traceback__)[0] + indent = 16 + co = func.__code__ + self.assertEqual(f.lineno, co.co_firstlineno + 2) + self.assertEqual(f.end_lineno, co.co_firstlineno + 2) + self.assertEqual(f.line[f.colno - indent : f.end_colno - indent], + expected) + + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-05-18-29-18.gh-issue-93691.6OautB.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-05-18-29-18.gh-issue-93691.6OautB.rst new file mode 100644 index 00000000000000..c06d5a276c03eb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-05-18-29-18.gh-issue-93691.6OautB.rst @@ -0,0 +1 @@ +Fix source locations of instructions generated for with statements. diff --git a/Python/compile.c b/Python/compile.c index 7d74096fcdf94e..cb724154206b7e 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -5900,7 +5900,7 @@ compiler_async_with(struct compiler *c, stmt_ty s, int pos) /* Evaluate EXPR */ VISIT(c, expr, item->context_expr); - + loc = LOC(item->context_expr); ADDOP(c, loc, BEFORE_ASYNC_WITH); ADDOP_I(c, loc, GET_AWAITABLE, 1); ADDOP_LOAD_CONST(c, loc, Py_None); @@ -5998,7 +5998,7 @@ compiler_with(struct compiler *c, stmt_ty s, int pos) /* Evaluate EXPR */ VISIT(c, expr, item->context_expr); /* Will push bound __exit__ */ - location loc = LOC(s); + location loc = LOC(item->context_expr); ADDOP(c, loc, BEFORE_WITH); ADDOP_JUMP(c, loc, SETUP_WITH, final); @@ -6031,7 +6031,6 @@ compiler_with(struct compiler *c, stmt_ty s, int pos) /* For successful outcome: * call __exit__(None, None, None) */ - loc = LOC(s); RETURN_IF_ERROR(compiler_call_exit_with_nones(c, loc)); ADDOP(c, loc, POP_TOP); ADDOP_JUMP(c, loc, JUMP, exit); From 225aab7f70d804174cc3a75bc04a5bb1545e5adb Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 7 Jun 2024 15:37:18 +0200 Subject: [PATCH 124/373] gh-110383: Improve 'old string formatting' text in tutorial (#120219) --- Doc/tutorial/inputoutput.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/tutorial/inputoutput.rst b/Doc/tutorial/inputoutput.rst index 857068a51ab843..b93a0e8cec2d38 100644 --- a/Doc/tutorial/inputoutput.rst +++ b/Doc/tutorial/inputoutput.rst @@ -279,9 +279,11 @@ left with zeros. It understands about plus and minus signs:: Old string formatting --------------------- -The % operator (modulo) can also be used for string formatting. Given ``'string' -% values``, instances of ``%`` in ``string`` are replaced with zero or more -elements of ``values``. This operation is commonly known as string +The % operator (modulo) can also be used for string formatting. +Given ``format % values`` (where *format* is a string), +``%`` conversion specifications in *format* are replaced with +zero or more elements of *values*. +This operation is commonly known as string interpolation. For example:: >>> import math From 9d6604222e9ef4e136ee9ccfa2d4d5ff9feee976 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 7 Jun 2024 17:42:01 +0200 Subject: [PATCH 125/373] gh-114264: Optimize performance of copy.deepcopy by adding a fast path for atomic types (GH-114266) --- Lib/copy.py | 31 ++++++------------- ...-01-18-21-44-23.gh-issue-114264.DBKn29.rst | 1 + 2 files changed, 11 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-18-21-44-23.gh-issue-114264.DBKn29.rst diff --git a/Lib/copy.py b/Lib/copy.py index a69bc4e78c20b3..7a1907d75494d7 100644 --- a/Lib/copy.py +++ b/Lib/copy.py @@ -121,6 +121,11 @@ def deepcopy(x, memo=None, _nil=[]): See the module's __doc__ string for more info. """ + cls = type(x) + + if cls in _atomic_types: + return x + d = id(x) if memo is None: memo = {} @@ -129,14 +134,12 @@ def deepcopy(x, memo=None, _nil=[]): if y is not _nil: return y - cls = type(x) - copier = _deepcopy_dispatch.get(cls) if copier is not None: y = copier(x, memo) else: if issubclass(cls, type): - y = _deepcopy_atomic(x, memo) + y = x # atomic copy else: copier = getattr(x, "__deepcopy__", None) if copier is not None: @@ -167,26 +170,12 @@ def deepcopy(x, memo=None, _nil=[]): _keep_alive(x, memo) # Make sure x lives at least as long as d return y +_atomic_types = {types.NoneType, types.EllipsisType, types.NotImplementedType, + int, float, bool, complex, bytes, str, types.CodeType, type, range, + types.BuiltinFunctionType, types.FunctionType, weakref.ref, property} + _deepcopy_dispatch = d = {} -def _deepcopy_atomic(x, memo): - return x -d[types.NoneType] = _deepcopy_atomic -d[types.EllipsisType] = _deepcopy_atomic -d[types.NotImplementedType] = _deepcopy_atomic -d[int] = _deepcopy_atomic -d[float] = _deepcopy_atomic -d[bool] = _deepcopy_atomic -d[complex] = _deepcopy_atomic -d[bytes] = _deepcopy_atomic -d[str] = _deepcopy_atomic -d[types.CodeType] = _deepcopy_atomic -d[type] = _deepcopy_atomic -d[range] = _deepcopy_atomic -d[types.BuiltinFunctionType] = _deepcopy_atomic -d[types.FunctionType] = _deepcopy_atomic -d[weakref.ref] = _deepcopy_atomic -d[property] = _deepcopy_atomic def _deepcopy_list(x, memo, deepcopy=deepcopy): y = [] diff --git a/Misc/NEWS.d/next/Library/2024-01-18-21-44-23.gh-issue-114264.DBKn29.rst b/Misc/NEWS.d/next/Library/2024-01-18-21-44-23.gh-issue-114264.DBKn29.rst new file mode 100644 index 00000000000000..069ac68b4f3a95 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-18-21-44-23.gh-issue-114264.DBKn29.rst @@ -0,0 +1 @@ +Improve performance of :func:`copy.deepcopy` by adding a fast path for atomic types. From 10fb1b8f36ab2fc3d2fe7392d5735dd19c5e2365 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 7 Jun 2024 18:48:31 +0300 Subject: [PATCH 126/373] gh-120200: Fix `inspect.iscoroutinefunction(inspect) is True` corner case (#120214) --- Lib/inspect.py | 6 +++--- Lib/test/test_inspect/test_inspect.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/inspect.py b/Lib/inspect.py index e6e49a4ffa673a..2b7f8bec482f8e 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -403,13 +403,13 @@ def isgeneratorfunction(obj): return _has_code_flag(obj, CO_GENERATOR) # A marker for markcoroutinefunction and iscoroutinefunction. -_is_coroutine_marker = object() +_is_coroutine_mark = object() def _has_coroutine_mark(f): while ismethod(f): f = f.__func__ f = functools._unwrap_partial(f) - return getattr(f, "_is_coroutine_marker", None) is _is_coroutine_marker + return getattr(f, "_is_coroutine_marker", None) is _is_coroutine_mark def markcoroutinefunction(func): """ @@ -417,7 +417,7 @@ def markcoroutinefunction(func): """ if hasattr(func, '__func__'): func = func.__func__ - func._is_coroutine_marker = _is_coroutine_marker + func._is_coroutine_marker = _is_coroutine_mark return func def iscoroutinefunction(obj): diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index 65007c16203c6d..0a4fa9343f15e0 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -235,6 +235,7 @@ class PMClass: gen_coroutine_function_example)))) self.assertFalse(inspect.iscoroutinefunction(gen_coro_pmi)) self.assertFalse(inspect.iscoroutinefunction(gen_coro_pmc)) + self.assertFalse(inspect.iscoroutinefunction(inspect)) self.assertFalse(inspect.iscoroutine(gen_coro)) self.assertTrue( From a7584245661102a5768c643fbd7db8395fd3c90e Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 7 Jun 2024 23:51:32 +0800 Subject: [PATCH 127/373] gh-120226: Fix test_sendfile_close_peer_in_the_middle_of_receiving on Linux >= 6.10 (#120227) The worst case is that the kernel buffers 17 pages with a page size of 64k. --- Lib/test/test_asyncio/test_sendfile.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_asyncio/test_sendfile.py b/Lib/test/test_asyncio/test_sendfile.py index d33ff197bbfa1d..2509d4382cdebd 100644 --- a/Lib/test/test_asyncio/test_sendfile.py +++ b/Lib/test/test_asyncio/test_sendfile.py @@ -93,13 +93,10 @@ async def wait_closed(self): class SendfileBase: - # 256 KiB plus small unaligned to buffer chunk - # Newer versions of Windows seems to have increased its internal - # buffer and tries to send as much of the data as it can as it - # has some form of buffering for this which is less than 256KiB - # on newer server versions and Windows 11. - # So DATA should be larger than 256 KiB to make this test reliable. - DATA = b"x" * (1024 * 256 + 1) + # Linux >= 6.10 seems buffering up to 17 pages of data. + # So DATA should be large enough to make this test reliable even with a + # 64 KiB page configuration. + DATA = b"x" * (1024 * 17 * 64 + 1) # Reduce socket buffer size to test on relative small data sets. BUF_SIZE = 4 * 1024 # 4 KiB From 90b75405260467814c93738a3325645918d4ea51 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 7 Jun 2024 17:58:21 +0200 Subject: [PATCH 128/373] gh-120155: Fix copy/paste error in HAVE_SUBOFFSETS_IN_LAST_DIM() (#120228) Don't hardcode 'dest' in HAVE_SUBOFFSETS_IN_LAST_DIM() macro of memoryobject.c, but use its 'view' parameter instead. Fix the Coverity issue: Error: COPY_PASTE_ERROR (CWE-398): Python-3.12.2/Objects/memoryobject.c:273:14: original: ""dest->suboffsets + (dest->ndim - 1)"" looks like the original copy. Python-3.12.2/Objects/memoryobject.c:274:14: copy_paste_error: ""dest"" in ""src->suboffsets + (dest->ndim - 1)"" looks like a copy-paste error. Python-3.12.2/Objects/memoryobject.c:274:14: remediation: Should it say ""src"" instead? # 272| assert(dest->ndim > 0 && src->ndim > 0); # 273| return (!HAVE_SUBOFFSETS_IN_LAST_DIM(dest) && # 274|-> !HAVE_SUBOFFSETS_IN_LAST_DIM(src) && # 275| dest->strides[dest->ndim-1] == dest->itemsize && # 276| src->strides[src->ndim-1] == src->itemsize); --- Objects/memoryobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index 5caa6504272301..226bd6defdec5a 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -268,7 +268,7 @@ PyTypeObject _PyManagedBuffer_Type = { /* Assumptions: ndim >= 1. The macro tests for a corner case that should perhaps be explicitly forbidden in the PEP. */ #define HAVE_SUBOFFSETS_IN_LAST_DIM(view) \ - (view->suboffsets && view->suboffsets[dest->ndim-1] >= 0) + (view->suboffsets && view->suboffsets[view->ndim-1] >= 0) static inline int last_dim_is_contiguous(const Py_buffer *dest, const Py_buffer *src) From 242c7498e5a889b47847fb6f0f133ce461fa7e24 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 7 Jun 2024 17:59:34 +0100 Subject: [PATCH 129/373] GH-116380: Move pathlib-specific code from `glob` to `pathlib._abc`. (#120011) In `glob._Globber`, move pathlib-specific methods to `pathlib._abc.PathGlobber` and replace them with abstract methods. Rename `glob._Globber` to `glob._GlobberBase`. As a result, the `glob` module is no longer befouled by code that can only ever apply to pathlib. No change of behaviour. --- Lib/glob.py | 34 ++++++++++++++++++++++------------ Lib/pathlib/_abc.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index fbb1d35aab71fa..574e5ad51b601d 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -328,8 +328,8 @@ def _compile_pattern(pat, sep, case_sensitive, recursive=True): return re.compile(regex, flags=flags).match -class _Globber: - """Class providing shell-style pattern matching and globbing. +class _GlobberBase: + """Abstract class providing shell-style pattern matching and globbing. """ def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): @@ -338,29 +338,37 @@ def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): self.case_pedantic = case_pedantic self.recursive = recursive - # Low-level methods + # Abstract methods - lexists = operator.methodcaller('exists', follow_symlinks=False) - add_slash = operator.methodcaller('joinpath', '') + @staticmethod + def lexists(path): + """Implements os.path.lexists(). + """ + raise NotImplementedError @staticmethod def scandir(path): - """Emulates os.scandir(), which returns an object that can be used as - a context manager. This method is called by walk() and glob(). + """Implements os.scandir(). + """ + raise NotImplementedError + + @staticmethod + def add_slash(path): + """Returns a path with a trailing slash added. """ - return contextlib.nullcontext(path.iterdir()) + raise NotImplementedError @staticmethod def concat_path(path, text): - """Appends text to the given path. + """Implements path concatenation. """ - return path.with_segments(path._raw_path + text) + raise NotImplementedError @staticmethod def parse_entry(entry): """Returns the path of an entry yielded from scandir(). """ - return entry + raise NotImplementedError # High-level methods @@ -520,7 +528,9 @@ def select_exists(self, path, exists=False): yield path -class _StringGlobber(_Globber): +class _StringGlobber(_GlobberBase): + """Provides shell-style pattern matching and globbing for string paths. + """ lexists = staticmethod(os.path.lexists) scandir = staticmethod(os.scandir) parse_entry = operator.attrgetter('path') diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 1a74f457c3f5a7..ecea8e88d1a2e3 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -12,8 +12,9 @@ """ import functools +import operator import posixpath -from glob import _Globber, _no_recurse_symlinks +from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -84,6 +85,33 @@ def isabs(self, path): raise UnsupportedOperation(self._unsupported_msg('isabs()')) +class PathGlobber(_GlobberBase): + """ + Class providing shell-style globbing for path objects. + """ + + lexists = operator.methodcaller('exists', follow_symlinks=False) + add_slash = operator.methodcaller('joinpath', '') + + @staticmethod + def scandir(path): + """Emulates os.scandir(), which returns an object that can be used as + a context manager. This method is called by walk() and glob(). + """ + import contextlib + return contextlib.nullcontext(path.iterdir()) + + @staticmethod + def concat_path(path, text): + """Appends text to the given path.""" + return path.with_segments(path._raw_path + text) + + @staticmethod + def parse_entry(entry): + """Returns the path of an entry yielded from scandir().""" + return entry + + class PurePathBase: """Base class for pure path objects. @@ -104,7 +132,7 @@ class PurePathBase: '_resolving', ) parser = ParserBase() - _globber = _Globber + _globber = PathGlobber def __init__(self, path, *paths): self._raw_path = self.parser.join(path, *paths) if paths else path From e6076d1e1303c3cc14bc02baf607535af2cf1501 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 7 Jun 2024 13:44:56 -0400 Subject: [PATCH 130/373] gh-119659: Get the datetime CAPI Tests Running Again (gh-120180) The tests were accidentally disabled by 2da0dc0, which didn't handle classes correctly. I considered updating no_rerun() to support classes, but the way test_datetime.py works would have made things fairly messy. Plus, it looks like the refleaks we had encountered before have been resolved. --- Lib/test/datetimetester.py | 3 +-- Lib/test/support/__init__.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 3759504b02e550..b80da5697ef865 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -22,7 +22,7 @@ from test import support from test.support import is_resource_enabled, ALWAYS_EQ, LARGEST, SMALLEST -from test.support import warnings_helper, no_rerun +from test.support import warnings_helper import datetime as datetime_module from datetime import MINYEAR, MAXYEAR @@ -6385,7 +6385,6 @@ class IranTest(ZoneInfoTest): @unittest.skipIf(_testcapi is None, 'need _testcapi module') -@no_rerun("the encapsulated datetime C API does not support reloading") class CapiTest(unittest.TestCase): def setUp(self): # Since the C API is not present in the _Pure tests, skip all tests diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 4b320b494bb8dd..9e6100d2b89d6e 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1197,6 +1197,7 @@ def no_rerun(reason): test using the 'reason' parameter. """ def deco(func): + assert not isinstance(func, type), func _has_run = False def wrapper(self): nonlocal _has_run From 4fc82b6d3b99f873179937215833e7a573ca7876 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 7 Jun 2024 22:37:35 +0100 Subject: [PATCH 131/373] gh-120225: fix crash in compiler on empty block at end of exception handler (#120235) --- Lib/test/test_compile.py | 10 ++++++++++ .../2024-06-07-16-09-04.gh-issue-120225.kuYf9t.rst | 1 + Python/flowgraph.c | 8 ++------ 3 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-07-16-09-04.gh-issue-120225.kuYf9t.rst diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index ba0bcc9c1ced99..ae23aea08d99bc 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1409,6 +1409,16 @@ def f(): for kw in ("except", "except*"): exec(code % kw, g, l); + def test_regression_gh_120225(self): + async def name_4(): + match b'': + case True: + pass + case name_5 if f'e': + {name_3: name_4 async for name_2 in name_5} + case []: + pass + [[]] @requires_debug_ranges() class TestSourcePositions(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-07-16-09-04.gh-issue-120225.kuYf9t.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-07-16-09-04.gh-issue-120225.kuYf9t.rst new file mode 100644 index 00000000000000..d00b9aaa8192e3 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-07-16-09-04.gh-issue-120225.kuYf9t.rst @@ -0,0 +1 @@ +Fix crash in compiler on empty block at end of exception handler. diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 17617e119fdaa4..aed694aee91f91 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2304,15 +2304,11 @@ push_cold_blocks_to_end(cfg_builder *g) { if (!IS_LABEL(b->b_next->b_label)) { b->b_next->b_label.id = next_lbl++; } - cfg_instr *prev_instr = basicblock_last_instr(b); - // b cannot be empty because at the end of an exception handler - // there is always a POP_EXCEPT + RERAISE/RETURN - assert(prev_instr); - basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id, - prev_instr->i_loc); + NO_LOCATION); explicit_jump->b_cold = 1; explicit_jump->b_next = b->b_next; + explicit_jump->b_predecessors = 1; b->b_next = explicit_jump; /* set target */ From 95f4db88d5ab7d900f05d0418b2a2e77bf9ff126 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 8 Jun 2024 10:51:09 +0300 Subject: [PATCH 132/373] gh-120242: Fix handling of `[setUp,tearDown]Class` in `test_datetime` (#120243) --- Lib/test/test_datetime.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py index 3859733a4fe65b..005187f13e665f 100644 --- a/Lib/test/test_datetime.py +++ b/Lib/test/test_datetime.py @@ -1,5 +1,6 @@ import unittest import sys +import functools from test.support.import_helper import import_fresh_module @@ -39,21 +40,26 @@ def load_tests(loader, tests, pattern): for cls in test_classes: cls.__name__ += suffix cls.__qualname__ += suffix - @classmethod - def setUpClass(cls_, module=module): - cls_._save_sys_modules = sys.modules.copy() - sys.modules[TESTS] = module - sys.modules['datetime'] = module.datetime_module - if hasattr(module, '_pydatetime'): - sys.modules['_pydatetime'] = module._pydatetime - sys.modules['_strptime'] = module._strptime - @classmethod - def tearDownClass(cls_): - sys.modules.clear() - sys.modules.update(cls_._save_sys_modules) - cls.setUpClass = setUpClass - cls.tearDownClass = tearDownClass - tests.addTests(loader.loadTestsFromTestCase(cls)) + + @functools.wraps(cls, updated=()) + class Wrapper(cls): + @classmethod + def setUpClass(cls_, module=module): + cls_._save_sys_modules = sys.modules.copy() + sys.modules[TESTS] = module + sys.modules['datetime'] = module.datetime_module + if hasattr(module, '_pydatetime'): + sys.modules['_pydatetime'] = module._pydatetime + sys.modules['_strptime'] = module._strptime + super().setUpClass() + + @classmethod + def tearDownClass(cls_): + super().tearDownClass() + sys.modules.clear() + sys.modules.update(cls_._save_sys_modules) + + tests.addTests(loader.loadTestsFromTestCase(Wrapper)) return tests From 2080425154d235b4b7dcc9a8a2f58e71769125ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrico=20Tr=C3=B6ger?= Date: Sat, 8 Jun 2024 11:19:13 +0200 Subject: [PATCH 133/373] bpo-37755: Use configured output in pydoc instead of pager (GH-15105) If the Helper() class was initialized with an output, the topics, keywords and symbols help still use the pager instead of the output. Change the behavior so the output is used if available while keeping the previous behavior if no output was configured. --- Lib/pydoc.py | 8 +- Lib/test/test_pydoc/test_pydoc.py | 125 +++++++++++++++--- ...4-06-02-13-35-11.gh-issue-81936.ETeW9x.rst | 3 + 3 files changed, 116 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-02-13-35-11.gh-issue-81936.ETeW9x.rst diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 2ba597d01f245e..d7579c1cc3dcd1 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -2034,7 +2034,7 @@ def help(self, request, is_cli=False): elif request in self.symbols: self.showsymbol(request) elif request in ['True', 'False', 'None']: # special case these keywords since they are objects too - doc(eval(request), 'Help on %s:', is_cli=is_cli) + doc(eval(request), 'Help on %s:', output=self._output, is_cli=is_cli) elif request in self.keywords: self.showtopic(request) elif request in self.topics: self.showtopic(request) elif request: doc(request, 'Help on %s:', output=self._output, is_cli=is_cli) @@ -2127,7 +2127,11 @@ def showtopic(self, topic, more_xrefs=''): text = 'Related help topics: ' + ', '.join(xrefs.split()) + '\n' wrapped_text = textwrap.wrap(text, 72) doc += '\n%s\n' % '\n'.join(wrapped_text) - pager(doc, f'Help on {topic!s}') + + if self._output is None: + pager(doc, f'Help on {topic!s}') + else: + self.output.write(doc) def _gettopic(self, topic, more_xrefs=''): """Return unbuffered tuple of (topic, xrefs). diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index 436fdb38756ddd..57e5b8e8abddfa 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -17,6 +17,7 @@ import types import typing import unittest +import unittest.mock import urllib.parse import xml.etree import xml.etree.ElementTree @@ -658,16 +659,13 @@ def test_fail_help_output_redirect(self): @unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(), 'trace function introduces __locals__ unexpectedly') + @unittest.mock.patch('pydoc.pager') @requires_docstrings - def test_help_output_redirect(self): + def test_help_output_redirect(self, pager_mock): # issue 940286, if output is set in Helper, then all output from # Helper.help should be redirected - getpager_old = pydoc.getpager - getpager_new = lambda: (lambda x: x) self.maxDiff = None - buf = StringIO() - helper = pydoc.Helper(output=buf) unused, doc_loc = get_pydoc_text(pydoc_mod) module = "test.test_pydoc.pydoc_mod" help_header = """ @@ -677,21 +675,112 @@ def test_help_output_redirect(self): help_header = textwrap.dedent(help_header) expected_help_pattern = help_header + expected_text_pattern - pydoc.getpager = getpager_new - try: + with captured_output('stdout') as output, \ + captured_output('stderr') as err, \ + StringIO() as buf: + helper = pydoc.Helper(output=buf) + helper.help(module) + result = buf.getvalue().strip() + expected_text = expected_help_pattern % ( + (doc_loc,) + + expected_text_data_docstrings + + (inspect.getabsfile(pydoc_mod),)) + self.assertEqual('', output.getvalue()) + self.assertEqual('', err.getvalue()) + self.assertEqual(expected_text, result) + + pager_mock.assert_not_called() + + @unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(), + 'trace function introduces __locals__ unexpectedly') + @requires_docstrings + @unittest.mock.patch('pydoc.pager') + def test_help_output_redirect_various_requests(self, pager_mock): + # issue 940286, if output is set in Helper, then all output from + # Helper.help should be redirected + + def run_pydoc_for_request(request, expected_text_part): + """Helper function to run pydoc with its output redirected""" with captured_output('stdout') as output, \ - captured_output('stderr') as err: - helper.help(module) + captured_output('stderr') as err, \ + StringIO() as buf: + helper = pydoc.Helper(output=buf) + helper.help(request) result = buf.getvalue().strip() - expected_text = expected_help_pattern % ( - (doc_loc,) + - expected_text_data_docstrings + - (inspect.getabsfile(pydoc_mod),)) - self.assertEqual('', output.getvalue()) - self.assertEqual('', err.getvalue()) - self.assertEqual(expected_text, result) - finally: - pydoc.getpager = getpager_old + self.assertEqual('', output.getvalue(), msg=f'failed on request "{request}"') + self.assertEqual('', err.getvalue(), msg=f'failed on request "{request}"') + self.assertIn(expected_text_part, result, msg=f'failed on request "{request}"') + pager_mock.assert_not_called() + + self.maxDiff = None + + # test for "keywords" + run_pydoc_for_request('keywords', 'Here is a list of the Python keywords.') + # test for "symbols" + run_pydoc_for_request('symbols', 'Here is a list of the punctuation symbols') + # test for "topics" + run_pydoc_for_request('topics', 'Here is a list of available topics.') + # test for "modules" skipped, see test_modules() + # test for symbol "%" + run_pydoc_for_request('%', 'The power operator') + # test for special True, False, None keywords + run_pydoc_for_request('True', 'class bool(int)') + run_pydoc_for_request('False', 'class bool(int)') + run_pydoc_for_request('None', 'class NoneType(object)') + # test for keyword "assert" + run_pydoc_for_request('assert', 'The "assert" statement') + # test for topic "TYPES" + run_pydoc_for_request('TYPES', 'The standard type hierarchy') + # test for "pydoc.Helper.help" + run_pydoc_for_request('pydoc.Helper.help', 'Help on function help in pydoc.Helper:') + # test for pydoc.Helper.help + run_pydoc_for_request(pydoc.Helper.help, 'Help on function help in module pydoc:') + # test for pydoc.Helper() instance skipped because it is always meant to be interactive + + def test_showtopic(self): + with captured_stdout() as showtopic_io: + helper = pydoc.Helper() + helper.showtopic('with') + helptext = showtopic_io.getvalue() + self.assertIn('The "with" statement', helptext) + + def test_fail_showtopic(self): + with captured_stdout() as showtopic_io: + helper = pydoc.Helper() + helper.showtopic('abd') + expected = "no documentation found for 'abd'" + self.assertEqual(expected, showtopic_io.getvalue().strip()) + + @unittest.mock.patch('pydoc.pager') + def test_fail_showtopic_output_redirect(self, pager_mock): + with StringIO() as buf: + helper = pydoc.Helper(output=buf) + helper.showtopic("abd") + expected = "no documentation found for 'abd'" + self.assertEqual(expected, buf.getvalue().strip()) + + pager_mock.assert_not_called() + + @unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(), + 'trace function introduces __locals__ unexpectedly') + @requires_docstrings + @unittest.mock.patch('pydoc.pager') + def test_showtopic_output_redirect(self, pager_mock): + # issue 940286, if output is set in Helper, then all output from + # Helper.showtopic should be redirected + self.maxDiff = None + + with captured_output('stdout') as output, \ + captured_output('stderr') as err, \ + StringIO() as buf: + helper = pydoc.Helper(output=buf) + helper.showtopic('with') + result = buf.getvalue().strip() + self.assertEqual('', output.getvalue()) + self.assertEqual('', err.getvalue()) + self.assertIn('The "with" statement', result) + + pager_mock.assert_not_called() def test_lambda_with_return_annotation(self): func = lambda a, b, c: 1 diff --git a/Misc/NEWS.d/next/Library/2024-06-02-13-35-11.gh-issue-81936.ETeW9x.rst b/Misc/NEWS.d/next/Library/2024-06-02-13-35-11.gh-issue-81936.ETeW9x.rst new file mode 100644 index 00000000000000..d53cc73e728d54 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-02-13-35-11.gh-issue-81936.ETeW9x.rst @@ -0,0 +1,3 @@ +:meth:`!help` and :meth:`!showtopic` methods now respect a +configured *output* argument to :class:`!pydoc.Helper` and not use the +pager in such cases. Patch by Enrico Tröger. From 55402d3232ca400ebafe4fe3bd70f252304ebe07 Mon Sep 17 00:00:00 2001 From: Saul Shanabrook Date: Sat, 8 Jun 2024 05:41:45 -0400 Subject: [PATCH 134/373] gh-119258: Eliminate Type Guards in Tier 2 Optimizer with Watcher (GH-119365) Co-authored-by: parmeggiani Co-authored-by: dpdani Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Brandt Bucher Co-authored-by: Ken Jin --- Include/internal/pycore_optimizer.h | 9 +- Include/internal/pycore_typeobject.h | 11 ++ Lib/test/test_capi/test_opt.py | 147 ++++++++++++++++++ Lib/test/test_capi/test_watchers.py | 6 +- Lib/test/test_type_cache.py | 3 +- ...-05-23-20-17-37.gh-issue-119258.wZFIpt.rst | 3 + Modules/_testcapimodule.c | 17 -- Modules/_testinternalcapi.c | 19 +++ Objects/typeobject.c | 78 ++++++++-- Python/optimizer_analysis.c | 16 +- Python/optimizer_bytecodes.c | 37 ++++- Python/optimizer_cases.c.h | 33 +++- Python/optimizer_symbols.c | 46 +++++- 13 files changed, 366 insertions(+), 59 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 76123987ac99f5..fd7833fd231299 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -33,6 +33,7 @@ struct _Py_UopsSymbol { int flags; // 0 bits: Top; 2 or more bits: Bottom PyTypeObject *typ; // Borrowed reference PyObject *const_val; // Owned reference (!) + unsigned int type_version; // currently stores type version }; #define UOP_FORMAT_TARGET 0 @@ -123,9 +124,11 @@ extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *con extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); +extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version); extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym); extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ); +extern bool _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version); extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val); extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); @@ -138,9 +141,9 @@ extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx); extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( _Py_UOpsContext *ctx, PyCodeObject *co, - _Py_UopsSymbol **localsplus_start, - int n_locals_already_filled, - int curr_stackentries); + int curr_stackentries, + _Py_UopsSymbol **args, + int arg_len); extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx); PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 8664ae0e44533f..bc295b1b066bd1 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -63,6 +63,8 @@ typedef struct { PyObject *tp_weaklist; } managed_static_type_state; +#define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ + struct types_state { /* Used to set PyTypeObject.tp_version_tag. It starts at _Py_MAX_GLOBAL_TYPE_VERSION_TAG + 1, @@ -118,6 +120,12 @@ struct types_state { managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; } for_extensions; PyMutex mutex; + + // Borrowed references to type objects whose + // tp_version_tag % TYPE_VERSION_CACHE_SIZE + // once was equal to the index in the table. + // They are cleared when the type object is deallocated. + PyTypeObject *type_version_cache[TYPE_VERSION_CACHE_SIZE]; }; @@ -230,6 +238,9 @@ extern void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long flags); +extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp); +PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version); +PyTypeObject *_PyType_LookupByVersion(unsigned int version); #ifdef __cplusplus } diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0491ff9b84d486..fc6d8b0a3f01d2 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1333,6 +1333,153 @@ def test_modified_local_is_seen_by_optimized_code(self): self.assertIs(type(s), float) self.assertEqual(s, 1024.0) + def test_guard_type_version_removed(self): + def thing(a): + x = 0 + for _ in range(100): + x += a.attr + x += a.attr + return x + + class Foo: + attr = 1 + + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + self.assertIsNotNone(ex) + self.assertEqual(res, 200) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_type_version_count, 1) + + def test_guard_type_version_removed_inlined(self): + """ + Verify that the guard type version if we have an inlined function + """ + + def fn(): + pass + + def thing(a): + x = 0 + for _ in range(100): + x += a.attr + fn() + x += a.attr + return x + + class Foo: + attr = 1 + + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + self.assertIsNotNone(ex) + self.assertEqual(res, 200) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_type_version_count, 1) + + def test_guard_type_version_not_removed(self): + """ + Verify that the guard type version is not removed if we modify the class + """ + + def thing(a): + x = 0 + for i in range(100): + x += a.attr + # for the first 90 iterations we set the attribute on this dummy function which shouldn't + # trigger the type watcher + # then after 90 it should trigger it and stop optimizing + # Note that the code needs to be in this weird form so it's optimized inline without any control flow + setattr((Foo, Bar)[i < 90], "attr", 2) + x += a.attr + return x + + class Foo: + attr = 1 + + class Bar: + pass + + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + + self.assertIsNotNone(ex) + self.assertEqual(res, 219) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_type_version_count, 2) + + + @unittest.expectedFailure + def test_guard_type_version_not_removed_escaping(self): + """ + Verify that the guard type version is not removed if have an escaping function + """ + + def thing(a): + x = 0 + for i in range(100): + x += a.attr + # eval should be escaping and so should cause optimization to stop and preserve both type versions + eval("None") + x += a.attr + return x + + class Foo: + attr = 1 + res, ex = self._run_with_optimizer(thing, Foo()) + opnames = list(iter_opnames(ex)) + self.assertIsNotNone(ex) + self.assertEqual(res, 200) + guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION") + # Note: This will actually be 1 for noe + # https://github.com/python/cpython/pull/119365#discussion_r1626220129 + self.assertEqual(guard_type_version_count, 2) + + + def test_guard_type_version_executor_invalidated(self): + """ + Verify that the executor is invalided on a type change. + """ + + def thing(a): + x = 0 + for i in range(100): + x += a.attr + x += a.attr + return x + + class Foo: + attr = 1 + + res, ex = self._run_with_optimizer(thing, Foo()) + self.assertEqual(res, 200) + self.assertIsNotNone(ex) + self.assertEqual(list(iter_opnames(ex)).count("_GUARD_TYPE_VERSION"), 1) + self.assertTrue(ex.is_valid()) + Foo.attr = 0 + self.assertFalse(ex.is_valid()) + + def test_type_version_doesnt_segfault(self): + """ + Tests that setting a type version doesn't cause a segfault when later looking at the stack. + """ + + # Minimized from mdp.py benchmark + + class A: + def __init__(self): + self.attr = {} + + def method(self, arg): + self.attr[arg] = None + + def fn(a): + for _ in range(100): + (_ for _ in []) + (_ for _ in [a.method(None)]) + + fn(A()) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index 90665a7561b316..709b5e1c4b716a 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -282,8 +282,10 @@ class C: pass self.watch(wid, C) with catch_unraisable_exception() as cm: C.foo = "bar" - self.assertEqual(cm.unraisable.err_msg, - f"Exception ignored in type watcher callback #0 for {C!r}") + self.assertEqual( + cm.unraisable.err_msg, + f"Exception ignored in type watcher callback #1 for {C!r}", + ) self.assertIs(cm.unraisable.object, None) self.assertEqual(str(cm.unraisable.exc_value), "boom!") self.assert_events([]) diff --git a/Lib/test/test_type_cache.py b/Lib/test/test_type_cache.py index e90e315c808361..edaf076707ad8b 100644 --- a/Lib/test/test_type_cache.py +++ b/Lib/test/test_type_cache.py @@ -10,8 +10,9 @@ # Skip this test if the _testcapi module isn't available. _testcapi = import_helper.import_module("_testcapi") +_testinternalcapi = import_helper.import_module("_testinternalcapi") type_get_version = _testcapi.type_get_version -type_assign_specific_version_unsafe = _testcapi.type_assign_specific_version_unsafe +type_assign_specific_version_unsafe = _testinternalcapi.type_assign_specific_version_unsafe type_assign_version = _testcapi.type_assign_version type_modified = _testcapi.type_modified diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst new file mode 100644 index 00000000000000..68f1ec1efa5751 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst @@ -0,0 +1,3 @@ +Eliminate type version guards in the tier two interpreter. + +Note that setting the ``tp_version_tag`` manually (which has never been supported) may result in crashes. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index b58c17260626c2..b139b46c826a3f 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -2403,21 +2403,6 @@ type_modified(PyObject *self, PyObject *type) Py_RETURN_NONE; } -// Circumvents standard version assignment machinery - use with caution and only on -// short-lived heap types -static PyObject * -type_assign_specific_version_unsafe(PyObject *self, PyObject *args) -{ - PyTypeObject *type; - unsigned int version; - if (!PyArg_ParseTuple(args, "Oi:type_assign_specific_version_unsafe", &type, &version)) { - return NULL; - } - assert(!PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE)); - type->tp_version_tag = version; - type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; - Py_RETURN_NONE; -} static PyObject * type_assign_version(PyObject *self, PyObject *type) @@ -3427,8 +3412,6 @@ static PyMethodDef TestMethods[] = { {"test_py_is_funcs", test_py_is_funcs, METH_NOARGS}, {"type_get_version", type_get_version, METH_O, PyDoc_STR("type->tp_version_tag")}, {"type_modified", type_modified, METH_O, PyDoc_STR("PyType_Modified")}, - {"type_assign_specific_version_unsafe", type_assign_specific_version_unsafe, METH_VARARGS, - PyDoc_STR("forcefully assign type->tp_version_tag")}, {"type_assign_version", type_assign_version, METH_O, PyDoc_STR("PyUnstable_Type_AssignVersionTag")}, {"type_get_tp_bases", type_get_tp_bases, METH_O}, {"type_get_tp_mro", type_get_tp_mro, METH_O}, diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 6d4a00c06ca9de..139a0509795de9 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2002,6 +2002,22 @@ has_inline_values(PyObject *self, PyObject *obj) } +// Circumvents standard version assignment machinery - use with caution and only on +// short-lived heap types +static PyObject * +type_assign_specific_version_unsafe(PyObject *self, PyObject *args) +{ + PyTypeObject *type; + unsigned int version; + if (!PyArg_ParseTuple(args, "Oi:type_assign_specific_version_unsafe", &type, &version)) { + return NULL; + } + assert(!PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE)); + _PyType_SetVersion(type, version); + type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; + Py_RETURN_NONE; +} + /*[clinic input] gh_119213_getargs @@ -2102,6 +2118,9 @@ static PyMethodDef module_functions[] = { {"get_rare_event_counters", get_rare_event_counters, METH_NOARGS}, {"reset_rare_event_counters", reset_rare_event_counters, METH_NOARGS}, {"has_inline_values", has_inline_values, METH_O}, + {"type_assign_specific_version_unsafe", type_assign_specific_version_unsafe, METH_VARARGS, + PyDoc_STR("forcefully assign type->tp_version_tag")}, + #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 880ac6b9c009fe..cd16bebd1e1cb8 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -853,7 +853,8 @@ PyType_AddWatcher(PyType_WatchCallback callback) { PyInterpreterState *interp = _PyInterpreterState_GET(); - for (int i = 0; i < TYPE_MAX_WATCHERS; i++) { + // start at 1, 0 is reserved for cpython optimizer + for (int i = 1; i < TYPE_MAX_WATCHERS; i++) { if (!interp->type_watchers[i]) { interp->type_watchers[i] = callback; return i; @@ -960,7 +961,7 @@ type_modification_starting_unlocked(PyTypeObject *type) } /* 0 is not a valid version tag */ - _Py_atomic_store_uint32_release(&type->tp_version_tag, 0); + _PyType_SetVersion(type, 0); } #endif @@ -1024,7 +1025,7 @@ type_modified_unlocked(PyTypeObject *type) } type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; - FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, 0); /* 0 is not a valid version tag */ + _PyType_SetVersion(type, 0); /* 0 is not a valid version tag */ if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // This field *must* be invalidated if the type is modified (see the // comment on struct _specialization_cache): @@ -1101,7 +1102,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { clear: assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; - FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, 0); /* 0 is not a valid version tag */ + _PyType_SetVersion(type, 0); /* 0 is not a valid version tag */ if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // This field *must* be invalidated if the type is modified (see the // comment on struct _specialization_cache): @@ -1109,6 +1110,64 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { } } +/* +The Tier 2 interpreter requires looking up the type object by the type version, so it can install +watchers to understand when they change. + +So we add a global cache from type version to borrowed references of type objects. + +This is similar to func_version_cache. +*/ + +void +_PyType_SetVersion(PyTypeObject *tp, unsigned int version) +{ +#ifndef Py_GIL_DISABLED + PyInterpreterState *interp = _PyInterpreterState_GET(); + // lookup the old version and set to null + if (tp->tp_version_tag != 0) { + PyTypeObject **slot = + interp->types.type_version_cache + + (tp->tp_version_tag % TYPE_VERSION_CACHE_SIZE); + *slot = NULL; + } +#endif + FT_ATOMIC_STORE_UINT32_RELAXED(tp->tp_version_tag, version); +#ifndef Py_GIL_DISABLED + if (version != 0) { + PyTypeObject **slot = + interp->types.type_version_cache + + (version % TYPE_VERSION_CACHE_SIZE); + *slot = tp; + } +#endif +} + +PyTypeObject * +_PyType_LookupByVersion(unsigned int version) +{ +#ifdef Py_GIL_DISABLED + return NULL; +#else + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyTypeObject **slot = + interp->types.type_version_cache + + (version % TYPE_VERSION_CACHE_SIZE); + if (*slot && (*slot)->tp_version_tag == version) { + return *slot; + } + return NULL; +#endif +} + +unsigned int +_PyType_GetVersionForCurrentState(PyTypeObject *tp) +{ + return tp->tp_version_tag; +} + + + #define MAX_VERSIONS_PER_CLASS 1000 static int @@ -1137,8 +1196,7 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) /* We have run out of version numbers */ return 0; } - FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, - NEXT_GLOBAL_VERSION_TAG++); + _PyType_SetVersion(type, NEXT_GLOBAL_VERSION_TAG++); assert (type->tp_version_tag <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); } else { @@ -1147,8 +1205,7 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) /* We have run out of version numbers */ return 0; } - FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, - NEXT_VERSION_TAG(interp)++); + _PyType_SetVersion(type, NEXT_VERSION_TAG(interp)++); assert (type->tp_version_tag != 0); } @@ -5768,7 +5825,7 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, if (final) { type->tp_flags &= ~Py_TPFLAGS_READY; type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; - type->tp_version_tag = 0; + _PyType_SetVersion(type, 0); } _PyStaticType_ClearWeakRefs(interp, type); @@ -5798,7 +5855,6 @@ type_dealloc(PyObject *self) _PyObject_ASSERT((PyObject *)type, type->tp_flags & Py_TPFLAGS_HEAPTYPE); _PyObject_GC_UNTRACK(type); - type_dealloc_common(type); // PyObject_ClearWeakRefs() raises an exception if Py_REFCNT() != 0 @@ -8367,7 +8423,7 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, self->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; assert(NEXT_GLOBAL_VERSION_TAG <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); - self->tp_version_tag = NEXT_GLOBAL_VERSION_TAG++; + _PyType_SetVersion(self, NEXT_GLOBAL_VERSION_TAG++); self->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; } else { diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index e5d3793bd4d204..75d1d9f6b2a794 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -79,6 +79,7 @@ increment_mutations(PyObject* dict) { * so we don't need to check that they haven't been used */ #define BUILTINS_WATCHER_ID 0 #define GLOBALS_WATCHER_ID 1 +#define TYPE_WATCHER_ID 0 static int globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, @@ -92,6 +93,14 @@ globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, return 0; } +static int +type_watcher_callback(PyTypeObject* type) +{ + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), type, 1); + PyType_Unwatch(TYPE_WATCHER_ID, (PyObject *)type); + return 0; +} + static PyObject * convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj) { @@ -167,6 +176,9 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) { interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback; } + if (interp->type_watchers[TYPE_WATCHER_ID] == NULL) { + interp->type_watchers[TYPE_WATCHER_ID] = type_watcher_callback; + } for (int pc = 0; pc < buffer_size; pc++) { _PyUOpInstruction *inst = &buffer[pc]; int opcode = inst->opcode; @@ -310,9 +322,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_has_type _Py_uop_sym_has_type #define sym_get_type _Py_uop_sym_get_type #define sym_matches_type _Py_uop_sym_matches_type +#define sym_matches_type_version _Py_uop_sym_matches_type_version #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) +#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define sym_truthiness _Py_uop_sym_truthiness @@ -395,7 +409,7 @@ optimize_uops( _PyUOpInstruction *corresponding_check_stack = NULL; _Py_uop_abstractcontext_init(ctx); - _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen); + _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0); if (frame == NULL) { return -1; } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index a2cb4c0b2c5192..e6fb85a90603eb 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -21,11 +21,13 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_matches_type _Py_uop_sym_matches_type +#define sym_matches_type_version _Py_uop_sym_matches_type_version #define sym_get_type _Py_uop_sym_get_type #define sym_has_type _Py_uop_sym_has_type #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM) #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM) #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE) +#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION) #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST) #define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new @@ -113,6 +115,29 @@ dummy_func(void) { sym_set_type(right, &PyLong_Type); } + op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { + assert(type_version); + if (sym_matches_type_version(owner, type_version)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } else { + // add watcher so that whenever the type changes we invalidate this + PyTypeObject *type = _PyType_LookupByVersion(type_version); + // if the type is null, it was not found in the cache (there was a conflict) + // with the key, in which case we can't trust the version + if (type) { + // if the type version was set properly, then add a watcher + // if it wasn't this means that the type version was previously set to something else + // and we set the owner to bottom, so we don't need to add a watcher because we must have + // already added one earlier. + if (sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } + } + + } + } + op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { if (sym_matches_type(left, &PyFloat_Type)) { if (sym_matches_type(right, &PyFloat_Type)) { @@ -563,16 +588,12 @@ dummy_func(void) { argcount++; } - _Py_UopsSymbol **localsplus_start = ctx->n_consumed; - int n_locals_already_filled = 0; - // Can determine statically, so we interleave the new locals - // and make the current stack the new locals. - // This also sets up for true call inlining. if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - localsplus_start = args; - n_locals_already_filled = argcount; + new_frame = frame_new(ctx, co, 0, args, argcount); + } else { + new_frame = frame_new(ctx, co, 0, NULL, 0); + } - new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); } op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index b3787345ec6714..18f3ca4cb73e5a 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -930,6 +930,28 @@ } case _GUARD_TYPE_VERSION: { + _Py_UopsSymbol *owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand; + assert(type_version); + if (sym_matches_type_version(owner, type_version)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } else { + // add watcher so that whenever the type changes we invalidate this + PyTypeObject *type = _PyType_LookupByVersion(type_version); + // if the type is null, it was not found in the cache (there was a conflict) + // with the key, in which case we can't trust the version + if (type) { + // if the type version was set properly, then add a watcher + // if it wasn't this means that the type version was previously set to something else + // and we set the owner to bottom, so we don't need to add a watcher because we must have + // already added one earlier. + if (sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); + _Py_BloomFilter_Add(dependencies, type); + } + } + } break; } @@ -1583,16 +1605,11 @@ args--; argcount++; } - _Py_UopsSymbol **localsplus_start = ctx->n_consumed; - int n_locals_already_filled = 0; - // Can determine statically, so we interleave the new locals - // and make the current stack the new locals. - // This also sets up for true call inlining. if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { - localsplus_start = args; - n_locals_already_filled = argcount; + new_frame = frame_new(ctx, co, 0, args, argcount); + } else { + new_frame = frame_new(ctx, co, 0, NULL, 0); } - new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0); stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame; stack_pointer += -1 - oparg; break; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index e546eef306eeca..f3d4078bf1a890 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -52,7 +52,8 @@ static inline int get_lltrace(void) { static _Py_UopsSymbol NO_SPACE_SYMBOL = { .flags = IS_NULL | NOT_NULL | NO_SPACE, .typ = NULL, - .const_val = NULL + .const_val = NULL, + .type_version = 0, }; _Py_UopsSymbol * @@ -76,6 +77,7 @@ sym_new(_Py_UOpsContext *ctx) self->flags = 0; self->typ = NULL; self->const_val = NULL; + self->type_version = 0; return self; } @@ -152,6 +154,18 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty } } +bool +_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version) +{ + // if the type version was already set, then it must be different and we should set it to bottom + if (sym->type_version) { + sym_set_bottom(ctx, sym); + return false; + } + sym->type_version = version; + return true; +} + void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val) { @@ -256,6 +270,12 @@ _Py_uop_sym_get_type(_Py_UopsSymbol *sym) return sym->typ; } +unsigned int +_Py_uop_sym_get_type_version(_Py_UopsSymbol *sym) +{ + return sym->type_version; +} + bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym) { @@ -272,6 +292,13 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ) return _Py_uop_sym_get_type(sym) == typ; } +bool +_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version) +{ + return _Py_uop_sym_get_type_version(sym) == version; +} + + int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym) { @@ -311,9 +338,9 @@ _Py_UOpsAbstractFrame * _Py_uop_frame_new( _Py_UOpsContext *ctx, PyCodeObject *co, - _Py_UopsSymbol **localsplus_start, - int n_locals_already_filled, - int curr_stackentries) + int curr_stackentries, + _Py_UopsSymbol **args, + int arg_len) { assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; @@ -321,19 +348,22 @@ _Py_uop_frame_new( frame->stack_len = co->co_stacksize; frame->locals_len = co->co_nlocalsplus; - frame->locals = localsplus_start; + frame->locals = ctx->n_consumed; frame->stack = frame->locals + co->co_nlocalsplus; frame->stack_pointer = frame->stack + curr_stackentries; - ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize); + ctx->n_consumed = ctx->n_consumed + (co->co_nlocalsplus + co->co_stacksize); if (ctx->n_consumed >= ctx->limit) { ctx->done = true; ctx->out_of_space = true; return NULL; } - // Initialize with the initial state of all local variables - for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) { + for (int i = 0; i < arg_len; i++) { + frame->locals[i] = args[i]; + } + + for (int i = arg_len; i < co->co_nlocalsplus; i++) { _Py_UopsSymbol *local = _Py_uop_sym_new_unknown(ctx); frame->locals[i] = local; } From 38a25e9560cf0ff0b80d9e90bce793ff24c6e027 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Sat, 8 Jun 2024 19:22:07 +0900 Subject: [PATCH 135/373] gh-120244: Fix re.sub() reference leak (GH-120245) --- .../next/Library/2024-06-08-09-45-31.gh-issue-120244.8o9Dzr.rst | 1 + Modules/_sre/sre.c | 1 + 2 files changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-06-08-09-45-31.gh-issue-120244.8o9Dzr.rst diff --git a/Misc/NEWS.d/next/Library/2024-06-08-09-45-31.gh-issue-120244.8o9Dzr.rst b/Misc/NEWS.d/next/Library/2024-06-08-09-45-31.gh-issue-120244.8o9Dzr.rst new file mode 100644 index 00000000000000..d21532f22a1d38 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-08-09-45-31.gh-issue-120244.8o9Dzr.rst @@ -0,0 +1 @@ +Fix memory leak in :func:`re.sub()` when the replacement string contains backreferences. diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index c1eff63d921de9..e33034086481c2 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -1622,6 +1622,7 @@ _sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template) } self->items[i].literal = Py_XNewRef(literal); } + PyObject_GC_Track(self); return (PyObject*) self; bad_template: From 5d59b870effa0f576acf7264cfcbfca2b36e34e3 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sun, 9 Jun 2024 00:11:19 +0800 Subject: [PATCH 136/373] gh-120121: Add InvalidStateError to concurrent.futures.__all__ (#120123) Co-authored-by: Nikita Sobolev --- Lib/concurrent/futures/__init__.py | 1 + .../next/Library/2024-06-05-16-30-28.gh-issue-120121.9dz8i7.rst | 1 + 2 files changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-06-05-16-30-28.gh-issue-120121.9dz8i7.rst diff --git a/Lib/concurrent/futures/__init__.py b/Lib/concurrent/futures/__init__.py index 292e886d5a88ac..72de617a5b6f61 100644 --- a/Lib/concurrent/futures/__init__.py +++ b/Lib/concurrent/futures/__init__.py @@ -23,6 +23,7 @@ 'ALL_COMPLETED', 'CancelledError', 'TimeoutError', + 'InvalidStateError', 'BrokenExecutor', 'Future', 'Executor', diff --git a/Misc/NEWS.d/next/Library/2024-06-05-16-30-28.gh-issue-120121.9dz8i7.rst b/Misc/NEWS.d/next/Library/2024-06-05-16-30-28.gh-issue-120121.9dz8i7.rst new file mode 100644 index 00000000000000..4f3526477c8cce --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-05-16-30-28.gh-issue-120121.9dz8i7.rst @@ -0,0 +1 @@ +Add :exc:`concurrent.futures.InvalidStateError` to module's ``__all__``. From 7c016deae62308dd1b4e2767fc6abf04857c7843 Mon Sep 17 00:00:00 2001 From: Clinton Date: Sat, 8 Jun 2024 13:18:58 -0400 Subject: [PATCH 137/373] gh-120276: Fix incorrect email.header.Header maxlinelen default (GH-120277) --- Doc/library/email.header.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst index 6e230d5faf1654..219fad0d2f6745 100644 --- a/Doc/library/email.header.rst +++ b/Doc/library/email.header.rst @@ -77,7 +77,7 @@ Here is the :class:`Header` class description: The maximum line length can be specified explicitly via *maxlinelen*. For splitting the first line to a shorter value (to account for the field header which isn't included in *s*, e.g. :mailheader:`Subject`) pass in the name of the - field in *header_name*. The default *maxlinelen* is 76, and the default value + field in *header_name*. The default *maxlinelen* is 78, and the default value for *header_name* is ``None``, meaning it is not taken into account for the first line of a long, split header. From 34f5ae69fe9ab0f5b23311d5c396d0cbb5902913 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sat, 8 Jun 2024 23:45:57 +0300 Subject: [PATCH 138/373] gh-120268: Prohibit passing ``None`` to ``_pydatetime.date.fromtimestamp`` (#120269) This makes the pure Python implementation consistent with the C implementation. --- Lib/_pydatetime.py | 2 ++ Lib/test/datetimetester.py | 5 +++++ .../Library/2024-06-08-14-36-40.gh-issue-120268.MNpd1q.rst | 2 ++ 3 files changed, 9 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-06-08-14-36-40.gh-issue-120268.MNpd1q.rst diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index b7d569cc41740e..34ccb2da13d0f3 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -966,6 +966,8 @@ def __new__(cls, year, month=None, day=None): @classmethod def fromtimestamp(cls, t): "Construct a date from a POSIX timestamp (like time.time())." + if t is None: + raise TypeError("'NoneType' object cannot be interpreted as an integer") y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t) return cls(y, m, d) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index b80da5697ef865..28f75a803b4e04 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1336,6 +1336,11 @@ def test_insane_fromtimestamp(self): self.assertRaises(OverflowError, self.theclass.fromtimestamp, insane) + def test_fromtimestamp_with_none_arg(self): + # See gh-120268 for more details + with self.assertRaises(TypeError): + self.theclass.fromtimestamp(None) + def test_today(self): import time diff --git a/Misc/NEWS.d/next/Library/2024-06-08-14-36-40.gh-issue-120268.MNpd1q.rst b/Misc/NEWS.d/next/Library/2024-06-08-14-36-40.gh-issue-120268.MNpd1q.rst new file mode 100644 index 00000000000000..d48d43cd047f7a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-08-14-36-40.gh-issue-120268.MNpd1q.rst @@ -0,0 +1,2 @@ +Prohibit passing ``None`` to pure-Python :meth:`datetime.date.fromtimestamp` +to achieve consistency with C-extension implementation. From 0ae8579b85f9b0cd3f287082ad6e194bdb025d88 Mon Sep 17 00:00:00 2001 From: Carl Meyer Date: Sun, 9 Jun 2024 22:23:30 -0400 Subject: [PATCH 139/373] gh-119666: fix multiple class-scope comprehensions referencing __class__ (#120295) --- Lib/test/test_listcomps.py | 25 +++++++++++++++++++ ...-06-09-19-13-38.gh-issue-119666.S0G4rZ.rst | 1 + Python/symtable.c | 23 ++++++++--------- 3 files changed, 36 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-09-19-13-38.gh-issue-119666.S0G4rZ.rst diff --git a/Lib/test/test_listcomps.py b/Lib/test/test_listcomps.py index ec2aac81682db8..58b076e9ea5d8a 100644 --- a/Lib/test/test_listcomps.py +++ b/Lib/test/test_listcomps.py @@ -168,6 +168,31 @@ def test_references___class__(self): """ self._check_in_scopes(code, raises=NameError) + def test_references___class___defined(self): + code = """ + __class__ = 2 + res = [__class__ for x in [1]] + """ + self._check_in_scopes( + code, outputs={"res": [2]}, scopes=["module", "function"]) + self._check_in_scopes(code, raises=NameError, scopes=["class"]) + + def test_references___class___enclosing(self): + code = """ + __class__ = 2 + class C: + res = [__class__ for x in [1]] + res = C.res + """ + self._check_in_scopes(code, raises=NameError) + + def test_super_and_class_cell_in_sibling_comps(self): + code = """ + [super for _ in [1]] + [__class__ for _ in [1]] + """ + self._check_in_scopes(code, raises=NameError) + def test_inner_cell_shadows_outer(self): code = """ items = [(lambda: i) for i in range(5)] diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-09-19-13-38.gh-issue-119666.S0G4rZ.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-09-19-13-38.gh-issue-119666.S0G4rZ.rst new file mode 100644 index 00000000000000..09c1f553c48702 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-09-19-13-38.gh-issue-119666.S0G4rZ.rst @@ -0,0 +1 @@ +Fix a compiler crash in the case where two comprehensions in class scope both reference ``__class__``. diff --git a/Python/symtable.c b/Python/symtable.c index 0ee8ca36cf8df0..7e452cdb13badf 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -781,22 +781,19 @@ inline_comprehension(PySTEntryObject *ste, PySTEntryObject *comp, if (existing == NULL && PyErr_Occurred()) { return 0; } + // __class__ is never allowed to be free through a class scope (see + // drop_class_free) + if (scope == FREE && ste->ste_type == ClassBlock && + _PyUnicode_EqualToASCIIString(k, "__class__")) { + scope = GLOBAL_IMPLICIT; + if (PySet_Discard(comp_free, k) < 0) { + return 0; + } + remove_dunder_class = 1; + } if (!existing) { // name does not exist in scope, copy from comprehension assert(scope != FREE || PySet_Contains(comp_free, k) == 1); - if (scope == FREE && ste->ste_type == ClassBlock && - _PyUnicode_EqualToASCIIString(k, "__class__")) { - // if __class__ is unbound in the enclosing class scope and free - // in the comprehension scope, it needs special handling; just - // letting it be marked as free in class scope will break due to - // drop_class_free - scope = GLOBAL_IMPLICIT; - only_flags &= ~DEF_FREE; - if (PySet_Discard(comp_free, k) < 0) { - return 0; - } - remove_dunder_class = 1; - } PyObject *v_flags = PyLong_FromLong(only_flags); if (v_flags == NULL) { return 0; From e5a7bc6f2eb9a3875063423caa67bb0ffcc3a6b8 Mon Sep 17 00:00:00 2001 From: Clinton Date: Mon, 10 Jun 2024 04:17:50 -0400 Subject: [PATCH 140/373] gh-120296: Fix format string of fcntl.ioctl() audit (#120301) --- Modules/fcntlmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/fcntlmodule.c b/Modules/fcntlmodule.c index 873bdf2ac0657a..0c06c03a6c403e 100644 --- a/Modules/fcntlmodule.c +++ b/Modules/fcntlmodule.c @@ -170,7 +170,7 @@ fcntl_ioctl_impl(PyObject *module, int fd, unsigned long code, Py_ssize_t len; char buf[IOCTL_BUFSZ+1]; /* argument plus NUL byte */ - if (PySys_Audit("fcntl.ioctl", "iIO", fd, code, + if (PySys_Audit("fcntl.ioctl", "ikO", fd, code, ob_arg ? ob_arg : Py_None) < 0) { return NULL; } From 4829522b8d3e1a28930f1cccfcc9635e035a0eb4 Mon Sep 17 00:00:00 2001 From: "E. M. Bray" Date: Mon, 10 Jun 2024 10:55:49 +0200 Subject: [PATCH 141/373] bpo-24766: doc= argument to subclasses of property not handled correctly (GH-2487) Co-authored-by: Serhiy Storchaka --- Lib/test/test_property.py | 34 +++++++++++++++++++ .../2018-10-09-15-14-53.bpo-24766.c_C1Wc.rst | 1 + Objects/descrobject.c | 19 +++-------- 3 files changed, 39 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-10-09-15-14-53.bpo-24766.c_C1Wc.rst diff --git a/Lib/test/test_property.py b/Lib/test/test_property.py index 408e64f53142db..b7a2219b96149a 100644 --- a/Lib/test/test_property.py +++ b/Lib/test/test_property.py @@ -463,6 +463,40 @@ def getter3(self): self.assertEqual(p.__doc__, "user") self.assertEqual(p2.__doc__, "user") + @unittest.skipIf(sys.flags.optimize >= 2, + "Docstrings are omitted with -O2 and above") + def test_prefer_explicit_doc(self): + # Issue 25757: subclasses of property lose docstring + self.assertEqual(property(doc="explicit doc").__doc__, "explicit doc") + self.assertEqual(PropertySub(doc="explicit doc").__doc__, "explicit doc") + + class Foo: + spam = PropertySub(doc="spam explicit doc") + + @spam.getter + def spam(self): + """ignored as doc already set""" + return 1 + + def _stuff_getter(self): + """ignored as doc set directly""" + stuff = PropertySub(doc="stuff doc argument", fget=_stuff_getter) + + #self.assertEqual(Foo.spam.__doc__, "spam explicit doc") + self.assertEqual(Foo.stuff.__doc__, "stuff doc argument") + + def test_property_no_doc_on_getter(self): + # If a property's getter has no __doc__ then the property's doc should + # be None; test that this is consistent with subclasses as well; see + # GH-2487 + class NoDoc: + @property + def __doc__(self): + raise AttributeError + + self.assertEqual(property(NoDoc()).__doc__, None) + self.assertEqual(PropertySub(NoDoc()).__doc__, None) + @unittest.skipIf(sys.flags.optimize >= 2, "Docstrings are omitted with -O2 and above") def test_property_setter_copies_getter_docstring(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-10-09-15-14-53.bpo-24766.c_C1Wc.rst b/Misc/NEWS.d/next/Core and Builtins/2018-10-09-15-14-53.bpo-24766.c_C1Wc.rst new file mode 100644 index 00000000000000..93a8562efe6d6f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-10-09-15-14-53.bpo-24766.c_C1Wc.rst @@ -0,0 +1 @@ +Fix handling of ``doc`` argument to subclasses of ``property``. diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 1b7e2fde3ceccd..4eccd1704eb95a 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -1859,22 +1859,9 @@ property_init_impl(propertyobject *self, PyObject *fget, PyObject *fset, /* if no docstring given and the getter has one, use that one */ else if (fget != NULL) { int rc = PyObject_GetOptionalAttr(fget, &_Py_ID(__doc__), &prop_doc); - if (rc <= 0) { + if (rc < 0) { return rc; } - if (!Py_IS_TYPE(self, &PyProperty_Type) && - prop_doc != NULL && prop_doc != Py_None) { - // This oddity preserves the long existing behavior of surfacing - // an AttributeError when using a dict-less (__slots__) property - // subclass as a decorator on a getter method with a docstring. - // See PropertySubclassTest.test_slots_docstring_copy_exception. - int err = PyObject_SetAttr( - (PyObject *)self, &_Py_ID(__doc__), prop_doc); - if (err < 0) { - Py_DECREF(prop_doc); // release our new reference. - return -1; - } - } if (prop_doc == Py_None) { prop_doc = NULL; Py_DECREF(Py_None); @@ -1902,7 +1889,9 @@ property_init_impl(propertyobject *self, PyObject *fget, PyObject *fset, Py_DECREF(prop_doc); if (err < 0) { assert(PyErr_Occurred()); - if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + if (!self->getter_doc && + PyErr_ExceptionMatches(PyExc_AttributeError)) + { PyErr_Clear(); // https://github.com/python/cpython/issues/98963#issuecomment-1574413319 // Python silently dropped this doc assignment through 3.11. From b90bd3e5bbc136f53b24ee791824acd6b17e0d42 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 10 Jun 2024 11:54:35 +0200 Subject: [PATCH 142/373] gh-120155: Fix Coverity issue in zoneinfo load_data() (#120232) Declare the 'rv' varaible at the top of the load_data() function to make sure that it's initialized before the first 'goto error' which uses 'rv' (return rv). Fix the Coverity issue: Error: UNINIT (CWE-457): Python-3.12.2/Modules/_zoneinfo.c:1233:5: skipped_decl: Jumping over declaration of ""rv"". Python-3.12.2/Modules/_zoneinfo.c:1284:5: uninit_use: Using uninitialized value ""rv"". 1282| } 1283| 1284|-> return rv; 1285| } 1286| --- Modules/_zoneinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index 38c3f0c45d803f..902ece795b575b 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -944,6 +944,7 @@ ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1) static int load_data(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self, PyObject *file_obj) { + int rv = 0; PyObject *data_tuple = NULL; long *utcoff = NULL; @@ -1220,7 +1221,6 @@ load_data(zoneinfo_state *state, PyZoneInfo_ZoneInfo *self, PyObject *file_obj) } } - int rv = 0; goto cleanup; error: // These resources only need to be freed if we have failed, if we succeed From c3b6dbff2c8886de1edade737febe85dd47ff4d0 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Mon, 10 Jun 2024 13:06:18 +0200 Subject: [PATCH 143/373] gh-115801: Only allow sequence of strings as input for difflib.unified_diff (GH-118333) --- Lib/difflib.py | 6 ++++ Lib/test/test_difflib.py | 30 +++++++++++++++---- ...-04-27-18-36-46.gh-issue-115801.SVeHSy.rst | 1 + 3 files changed, 31 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-27-18-36-46.gh-issue-115801.SVeHSy.rst diff --git a/Lib/difflib.py b/Lib/difflib.py index 0443963b4fd697..7f595b6c72e641 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1264,6 +1264,12 @@ def _check_types(a, b, *args): if b and not isinstance(b[0], str): raise TypeError('lines to compare must be str, not %s (%r)' % (type(b[0]).__name__, b[0])) + if isinstance(a, str): + raise TypeError('input must be a sequence of strings, not %s' % + type(a).__name__) + if isinstance(b, str): + raise TypeError('input must be a sequence of strings, not %s' % + type(b).__name__) for arg in args: if not isinstance(arg, str): raise TypeError('all arguments must be str, not: %r' % (arg,)) diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index bf6e5b1152b4a2..9e217249be7332 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -295,7 +295,7 @@ def test_close_matches_aligned(self): class TestOutputFormat(unittest.TestCase): def test_tab_delimiter(self): - args = ['one', 'two', 'Original', 'Current', + args = [['one'], ['two'], 'Original', 'Current', '2005-01-26 23:30:50', '2010-04-02 10:20:52'] ud = difflib.unified_diff(*args, lineterm='') self.assertEqual(list(ud)[0:2], [ @@ -307,7 +307,7 @@ def test_tab_delimiter(self): "--- Current\t2010-04-02 10:20:52"]) def test_no_trailing_tab_on_empty_filedate(self): - args = ['one', 'two', 'Original', 'Current'] + args = [['one'], ['two'], 'Original', 'Current'] ud = difflib.unified_diff(*args, lineterm='') self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) @@ -447,6 +447,28 @@ def assertDiff(expect, actual): lineterm=b'') assertDiff(expect, actual) + +class TestInputTypes(unittest.TestCase): + def _assert_type_error(self, msg, generator, *args): + with self.assertRaises(TypeError) as ctx: + list(generator(*args)) + self.assertEqual(msg, str(ctx.exception)) + + def test_input_type_checks(self): + unified = difflib.unified_diff + context = difflib.context_diff + + expect = "input must be a sequence of strings, not str" + self._assert_type_error(expect, unified, 'a', ['b']) + self._assert_type_error(expect, context, 'a', ['b']) + + self._assert_type_error(expect, unified, ['a'], 'b') + self._assert_type_error(expect, context, ['a'], 'b') + + expect = "lines to compare must be str, not NoneType (None)" + self._assert_type_error(expect, unified, ['a'], [None]) + self._assert_type_error(expect, context, ['a'], [None]) + def test_mixed_types_content(self): # type of input content must be consistent: all str or all bytes a = [b'hello'] @@ -495,10 +517,6 @@ def test_mixed_types_dates(self): b = ['bar\n'] list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb)) - def _assert_type_error(self, msg, generator, *args): - with self.assertRaises(TypeError) as ctx: - list(generator(*args)) - self.assertEqual(msg, str(ctx.exception)) class TestJunkAPIs(unittest.TestCase): def test_is_line_junk_true(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-27-18-36-46.gh-issue-115801.SVeHSy.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-27-18-36-46.gh-issue-115801.SVeHSy.rst new file mode 100644 index 00000000000000..93b176d5767335 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-27-18-36-46.gh-issue-115801.SVeHSy.rst @@ -0,0 +1 @@ +Raise ``TypeError`` when passing a string to :func:`difflib.unified_diff` and :func:`difflib.context_diff`. From 56c3815ba14c790d2e9a227b4ac0ead5e6b1e570 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 10 Jun 2024 16:15:12 +0100 Subject: [PATCH 144/373] gh-119786: copy compiler doc from devguide to InternalDocs and convert to markdown (#120134) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * gh-119876: move compiler doc from devguide to InternalDocs Copy of https://github.com/python/devguide/commit/78fc0d7aa9fd0d6733d10c23b178b2a0e2799afc Co-Authored-By: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-Authored-By: Adam Turner <9087854+aa-turner@users.noreply.github.com> Co-Authored-By: Brett Cannon Co-Authored-By: Carol Willing Co-Authored-By: Daniel Porteous Co-Authored-By: Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> Co-Authored-By: Éric Araujo Co-Authored-By: Erlend Egeberg Aasland Co-Authored-By: Ezio Melotti Co-Authored-By: Georg Brandl Co-Authored-By: Guido van Rossum Co-Authored-By: Hugo van Kemenade Co-Authored-By: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Co-Authored-By: Jeff Allen Co-Authored-By: Jim Fasarakis-Hilliard Co-Authored-By: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Co-Authored-By: Lincoln <71312724+Lincoln-developer@users.noreply.github.com> Co-Authored-By: Mariatta Co-Authored-By: Muhammad Mahad Co-Authored-By: Ned Deily Co-Authored-By: Pablo Galindo Salgado Co-Authored-By: Serhiy Storchaka Co-Authored-By: Stéphane Wirtel Co-Authored-By: Suriyaa ✌️️ Co-Authored-By: Zachary Ware Co-Authored-By: psyker156 <242220+psyker156@users.noreply.github.com> Co-Authored-By: slateny <46876382+slateny@users.noreply.github.com> Co-Authored-By: svelankar <17737361+svelankar@users.noreply.github.com> Co-Authored-By: zikcheng * convert to markdown * add to index * update more of the out of date stuff --------- Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Brett Cannon Co-authored-by: Carol Willing Co-authored-by: Daniel Porteous Co-authored-by: Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> Co-authored-by: Éric Araujo Co-authored-by: Erlend Egeberg Aasland Co-authored-by: Ezio Melotti Co-authored-by: Georg Brandl Co-authored-by: Guido van Rossum Co-authored-by: Hugo van Kemenade Co-authored-by: Jeff Allen Co-authored-by: Jim Fasarakis-Hilliard Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Co-authored-by: Lincoln <71312724+Lincoln-developer@users.noreply.github.com> Co-authored-by: Mariatta Co-authored-by: Muhammad Mahad Co-authored-by: Ned Deily Co-authored-by: Pablo Galindo Salgado Co-authored-by: Serhiy Storchaka Co-authored-by: Stéphane Wirtel Co-authored-by: Suriyaa ✌️️ Co-authored-by: Zachary Ware Co-authored-by: psyker156 <242220+psyker156@users.noreply.github.com> Co-authored-by: slateny <46876382+slateny@users.noreply.github.com> Co-authored-by: svelankar <17737361+svelankar@users.noreply.github.com> Co-authored-by: zikcheng --- InternalDocs/README.md | 2 + InternalDocs/compiler.md | 651 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 653 insertions(+) create mode 100644 InternalDocs/compiler.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index a2502fbf198735..42f6125794266a 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -12,6 +12,8 @@ it is not, please report that through the [issue tracker](https://github.com/python/cpython/issues). +[Compiler Design](compiler.md) + [Exception Handling](exception_handling.md) [Adaptive Instruction Families](adaptive.md) diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md new file mode 100644 index 00000000000000..0abc10da6e05c6 --- /dev/null +++ b/InternalDocs/compiler.md @@ -0,0 +1,651 @@ + +Compiler design +=============== + +Abstract +-------- + +In CPython, the compilation from source code to bytecode involves several steps: + +1. Tokenize the source code + [Parser/lexer/](https://github.com/python/cpython/blob/main/Parser/lexer/) + and [Parser/tokenizer/](https://github.com/python/cpython/blob/main/Parser/tokenizer/). +2. Parse the stream of tokens into an Abstract Syntax Tree + [Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c). +3. Transform AST into an instruction sequence + [Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c). +4. Construct a Control Flow Graph and apply optimizations to it + [Python/flowgraph.c](https://github.com/python/cpython/blob/main/Python/flowgraph.c). +5. Emit bytecode based on the Control Flow Graph + [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). + +This document outlines how these steps of the process work. + +This document only describes parsing in enough depth to explain what is needed +for understanding compilation. This document provides a detailed, though not +exhaustive, view of the how the entire system works. You will most likely need +to read some source code to have an exact understanding of all details. + + +Parsing +======= + +As of Python 3.9, Python's parser is a PEG parser of a somewhat +unusual design. It is unusual in the sense that the parser's input is a stream +of tokens rather than a stream of characters which is more common with PEG +parsers. + +The grammar file for Python can be found in +[Grammar/python.gram](https://github.com/python/cpython/blob/main/Grammar/python.gram). +The definitions for literal tokens (such as ``:``, numbers, etc.) can be found in +[Grammar/Tokens](https://github.com/python/cpython/blob/main/Grammar/Tokens). +Various C files, including +[Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c) +are generated from these. + +See Also: + +* [Guide to the parser](https://devguide.python.org/internals/parser/index.html) + for a detailed description of the parser. + +* [Changing CPython’s grammar](https://devguide.python.org/developer-workflow/grammar/#grammar) + for a detailed description of the grammar. + + +Abstract syntax trees (AST) +=========================== + + +The abstract syntax tree (AST) is a high-level representation of the +program structure without the necessity of containing the source code; +it can be thought of as an abstract representation of the source code. The +specification of the AST nodes is specified using the Zephyr Abstract +Syntax Definition Language (ASDL) [^1], [^2]. + +The definition of the AST nodes for Python is found in the file +[Parser/Python.asdl](https://github.com/python/cpython/blob/main/Parser/Python.asdl). + +Each AST node (representing statements, expressions, and several +specialized types, like list comprehensions and exception handlers) is +defined by the ASDL. Most definitions in the AST correspond to a +particular source construct, such as an 'if' statement or an attribute +lookup. The definition is independent of its realization in any +particular programming language. + +The following fragment of the Python ASDL construct demonstrates the +approach and syntax: + +``` + module Python + { + stmt = FunctionDef(identifier name, arguments args, stmt* body, + expr* decorators) + | Return(expr? value) | Yield(expr? value) + attributes (int lineno) + } +``` + +The preceding example describes two different kinds of statements and an +expression: function definitions, return statements, and yield expressions. +All three kinds are considered of type ``stmt`` as shown by ``|`` separating +the various kinds. They all take arguments of various kinds and amounts. + +Modifiers on the argument type specify the number of values needed; ``?`` +means it is optional, ``*`` means 0 or more, while no modifier means only one +value for the argument and it is required. ``FunctionDef``, for instance, +takes an ``identifier`` for the *name*, ``arguments`` for *args*, zero or more +``stmt`` arguments for *body*, and zero or more ``expr`` arguments for +*decorators*. + +Do notice that something like 'arguments', which is a node type, is +represented as a single AST node and not as a sequence of nodes as with +stmt as one might expect. + +All three kinds also have an 'attributes' argument; this is shown by the +fact that 'attributes' lacks a '|' before it. + +The statement definitions above generate the following C structure type: + + +``` + typedef struct _stmt *stmt_ty; + + struct _stmt { + enum { FunctionDef_kind=1, Return_kind=2, Yield_kind=3 } kind; + union { + struct { + identifier name; + arguments_ty args; + asdl_seq *body; + } FunctionDef; + + struct { + expr_ty value; + } Return; + + struct { + expr_ty value; + } Yield; + } v; + int lineno; + } +``` + +Also generated are a series of constructor functions that allocate (in +this case) a ``stmt_ty`` struct with the appropriate initialization. The +``kind`` field specifies which component of the union is initialized. The +``FunctionDef()`` constructor function sets 'kind' to ``FunctionDef_kind`` and +initializes the *name*, *args*, *body*, and *attributes* fields. + +See also +[Green Tree Snakes - The missing Python AST docs](https://greentreesnakes.readthedocs.io/en/latest) + by Thomas Kluyver. + +Memory management +================= + +Before discussing the actual implementation of the compiler, a discussion of +how memory is handled is in order. To make memory management simple, an **arena** +is used that pools memory in a single location for easy +allocation and removal. This enables the removal of explicit memory +deallocation. Because memory allocation for all needed memory in the compiler +registers that memory with the arena, a single call to free the arena is all +that is needed to completely free all memory used by the compiler. + +In general, unless you are working on the critical core of the compiler, memory +management can be completely ignored. But if you are working at either the +very beginning of the compiler or the end, you need to care about how the arena +works. All code relating to the arena is in either +[Include/internal/pycore_pyarena.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_pyarena.h) +or [Python/pyarena.c](https://github.com/python/cpython/blob/main/Python/pyarena.c). + +``PyArena_New()`` will create a new arena. The returned ``PyArena`` structure +will store pointers to all memory given to it. This does the bookkeeping of +what memory needs to be freed when the compiler is finished with the memory it +used. That freeing is done with ``PyArena_Free()``. This only needs to be +called in strategic areas where the compiler exits. + +As stated above, in general you should not have to worry about memory +management when working on the compiler. The technical details of memory +management have been designed to be hidden from you for most cases. + +The only exception comes about when managing a PyObject. Since the rest +of Python uses reference counting, there is extra support added +to the arena to cleanup each PyObject that was allocated. These cases +are very rare. However, if you've allocated a PyObject, you must tell +the arena about it by calling ``PyArena_AddPyObject()``. + + +Source code to AST +================== + +The AST is generated from source code using the function +``_PyParser_ASTFromString()`` or ``_PyParser_ASTFromFile()`` +[Parser/peg_api.c](https://github.com/python/cpython/blob/main/Parser/peg_api.c). + +After some checks, a helper function in +[Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c) +begins applying production rules on the source code it receives; converting source +code to tokens and matching these tokens recursively to their corresponding rule. The +production rule's corresponding rule function is called on every match. These rule +functions follow the format `xx_rule`. Where *xx* is the grammar rule +that the function handles and is automatically derived from +[Grammar/python.gram](https://github.com/python/cpython/blob/main/Grammar/python.gram) by +[Tools/peg_generator/pegen/c_generator.py](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/c_generator.py). + +Each rule function in turn creates an AST node as it goes along. It does this +by allocating all the new nodes it needs, calling the proper AST node creation +functions for any required supporting functions and connecting them as needed. +This continues until all nonterminal symbols are replaced with terminals. If an +error occurs, the rule functions backtrack and try another rule function. If +there are no more rules, an error is set and the parsing ends. + +The AST node creation helper functions have the name `_PyAST_{xx}` +where *xx* is the AST node that the function creates. These are defined by the +ASDL grammar and contained in +[Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) +(which is generated by +[Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py) +from +[Parser/Python.asdl](https://github.com/python/cpython/blob/main/Parser/Python.asdl)). +This all leads to a sequence of AST nodes stored in ``asdl_seq`` structs. + +To demonstrate everything explained so far, here's the +rule function responsible for a simple named import statement such as +``import sys``. Note that error-checking and debugging code has been +omitted. Removed parts are represented by ``...``. +Furthermore, some comments have been added for explanation. These comments +may not be present in the actual code. + + +``` + // This is the production rule (from python.gram) the rule function + // corresponds to: + // import_name: 'import' dotted_as_names + static stmt_ty + import_name_rule(Parser *p) + { + ... + stmt_ty _res = NULL; + { // 'import' dotted_as_names + ... + Token * _keyword; + asdl_alias_seq* a; + // The tokenizing steps. + if ( + (_keyword = _PyPegen_expect_token(p, 513)) // token='import' + && + (a = dotted_as_names_rule(p)) // dotted_as_names + ) + { + ... + // Generate an AST for the import statement. + _res = _PyAST_Import ( a , ...); + ... + goto done; + } + ... + } + _res = NULL; + done: + ... + return _res; + } +``` + + +To improve backtracking performance, some rules (chosen by applying a +``(memo)`` flag in the grammar file) are memoized. Each rule function checks if +a memoized version exists and returns that if so, else it continues in the +manner stated in the previous paragraphs. + +There are macros for creating and using ``asdl_xx_seq *`` types, where *xx* is +a type of the ASDL sequence. Three main types are defined +manually -- ``generic``, ``identifier`` and ``int``. These types are found in +[Python/asdl.c](https://github.com/python/cpython/blob/main/Python/asdl.c) +and its corresponding header file +[Include/internal/pycore_asdl.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_asdl.h). +Functions and macros for creating ``asdl_xx_seq *`` types are as follows: + +``_Py_asdl_generic_seq_new(Py_ssize_t, PyArena *)`` + Allocate memory for an ``asdl_generic_seq`` of the specified length +``_Py_asdl_identifier_seq_new(Py_ssize_t, PyArena *)`` + Allocate memory for an ``asdl_identifier_seq`` of the specified length +``_Py_asdl_int_seq_new(Py_ssize_t, PyArena *)`` + Allocate memory for an ``asdl_int_seq`` of the specified length + +In addition to the three types mentioned above, some ASDL sequence types are +automatically generated by +[Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py) +and found in +[Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h). +Macros for using both manually defined and automatically generated ASDL +sequence types are as follows: + +``asdl_seq_GET(asdl_xx_seq *, int)`` + Get item held at a specific position in an ``asdl_xx_seq`` +``asdl_seq_SET(asdl_xx_seq *, int, stmt_ty)`` + Set a specific index in an ``asdl_xx_seq`` to the specified value + +Untyped counterparts exist for some of the typed macros. These are useful +when a function needs to manipulate a generic ASDL sequence: + +``asdl_seq_GET_UNTYPED(asdl_seq *, int)`` + Get item held at a specific position in an ``asdl_seq`` +``asdl_seq_SET_UNTYPED(asdl_seq *, int, stmt_ty)`` + Set a specific index in an ``asdl_seq`` to the specified value +``asdl_seq_LEN(asdl_seq *)`` + Return the length of an ``asdl_seq`` or ``asdl_xx_seq`` + +Note that typed macros and functions are recommended over their untyped +counterparts. Typed macros carry out checks in debug mode and aid +debugging errors caused by incorrectly casting from ``void *``. + +If you are working with statements, you must also worry about keeping +track of what line number generated the statement. Currently the line +number is passed as the last parameter to each ``stmt_ty`` function. + +See also [PEP 617: New PEG parser for CPython](https://peps.python.org/pep-0617/). + + +Control flow graphs +=================== + +A **control flow graph** (often referenced by its acronym, **CFG**) is a +directed graph that models the flow of a program. A node of a CFG is +not an individual bytecode instruction, but instead represents a +sequence of bytecode instructions that always execute sequentially. +Each node is called a *basic block* and must always execute from +start to finish, with a single entry point at the beginning and a +single exit point at the end. If some bytecode instruction *a* needs +to jump to some other bytecode instruction *b*, then *a* must occur at +the end of its basic block, and *b* must occur at the start of its +basic block. + +As an example, consider the following code snippet: + +.. code-block:: Python + + if x < 10: + f1() + f2() + else: + g() + end() + +The ``x < 10`` guard is represented by its own basic block that +compares ``x`` with ``10`` and then ends in a conditional jump based on +the result of the comparison. This conditional jump allows the block +to point to both the body of the ``if`` and the body of the ``else``. The +``if`` basic block contains the ``f1()`` and ``f2()`` calls and points to +the ``end()`` basic block. The ``else`` basic block contains the ``g()`` +call and similarly points to the ``end()`` block. + +Note that more complex code in the guard, the ``if`` body, or the ``else`` +body may be represented by multiple basic blocks. For instance, +short-circuiting boolean logic in a guard like ``if x or y:`` +will produce one basic block that tests the truth value of ``x`` +and then points both (1) to the start of the ``if`` body and (2) to +a different basic block that tests the truth value of y. + +CFGs are useful as an intermediate representation of the code because +they are a convenient data structure for optimizations. + +AST to CFG to bytecode +====================== + +The conversion of an ``AST`` to bytecode is initiated by a call to the function +``_PyAST_Compile()`` in +[Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c). + +The first step is to construct the symbol table. This is implemented by +``_PySymtable_Build()`` in +[Python/symtable.c](https://github.com/python/cpython/blob/main/Python/symtable.c). +This function begins by entering the starting code block for the AST (passed-in) +and then calling the proper `symtable_visit_{xx}` function (with *xx* being the +AST node type). Next, the AST tree is walked with the various code blocks that +delineate the reach of a local variable as blocks are entered and exited using +``symtable_enter_block()`` and ``symtable_exit_block()``, respectively. + +Once the symbol table is created, the ``AST`` is transformed by ``compiler_codegen()`` +in [Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c) +into a sequence of pseudo instructions. These are similar to bytecode, but +in some cases they are more abstract, and are resolved later into actual +bytecode. The construction of this instruction sequence is handled by several +functions that break the task down by various AST node types. The functions are +all named `compiler_visit_{xx}` where *xx* is the name of the node type (such +as ``stmt``, ``expr``, etc.). Each function receives a ``struct compiler *`` +and `{xx}_ty` where *xx* is the AST node type. Typically these functions +consist of a large 'switch' statement, branching based on the kind of +node type passed to it. Simple things are handled inline in the +'switch' statement with more complex transformations farmed out to other +functions named `compiler_{xx}` with *xx* being a descriptive name of what is +being handled. + +When transforming an arbitrary AST node, use the ``VISIT()`` macro. +The appropriate `compiler_visit_{xx}` function is called, based on the value +passed in for (so `VISIT({c}, expr, {node})` calls +`compiler_visit_expr({c}, {node})`). The ``VISIT_SEQ()`` macro is very similar, +but is called on AST node sequences (those values that were created as +arguments to a node that used the '*' modifier). + +Emission of bytecode is handled by the following macros: + +* ``ADDOP(struct compiler *, location, int)`` + add a specified opcode +* ``ADDOP_IN_SCOPE(struct compiler *, location, int)`` + like ``ADDOP``, but also exits current scope; used for adding return value + opcodes in lambdas and closures +* ``ADDOP_I(struct compiler *, location, int, Py_ssize_t)`` + add an opcode that takes an integer argument +* ``ADDOP_O(struct compiler *, location, int, PyObject *, TYPE)`` + add an opcode with the proper argument based on the position of the + specified PyObject in PyObject sequence object, but with no handling of + mangled names; used for when you + need to do named lookups of objects such as globals, consts, or + parameters where name mangling is not possible and the scope of the + name is known; *TYPE* is the name of PyObject sequence + (``names`` or ``varnames``) +* ``ADDOP_N(struct compiler *, location, int, PyObject *, TYPE)`` + just like ``ADDOP_O``, but steals a reference to PyObject +* ``ADDOP_NAME(struct compiler *, location, int, PyObject *, TYPE)`` + just like ``ADDOP_O``, but name mangling is also handled; used for + attribute loading or importing based on name +* ``ADDOP_LOAD_CONST(struct compiler *, location, PyObject *)`` + add the ``LOAD_CONST`` opcode with the proper argument based on the + position of the specified PyObject in the consts table. +* ``ADDOP_LOAD_CONST_NEW(struct compiler *, location, PyObject *)`` + just like ``ADDOP_LOAD_CONST_NEW``, but steals a reference to PyObject +* ``ADDOP_JUMP(struct compiler *, location, int, basicblock *)`` + create a jump to a basic block + +The ``location`` argument is a struct with the source location to be +associated with this instruction. It is typically extracted from an +``AST`` node with the ``LOC`` macro. The ``NO_LOCATION`` can be used +for *synthetic* instructions, which we do not associate with a line +number at this stage. For example, the implicit ``return None`` +which is added at the end of a function is not associated with any +line in the source code. + +There are several helper functions that will emit pseudo-instructions +and are named `compiler_{xx}()` where *xx* is what the function helps +with (``list``, ``boolop``, etc.). A rather useful one is ``compiler_nameop()``. +This function looks up the scope of a variable and, based on the +expression context, emits the proper opcode to load, store, or delete +the variable. + +Once the instruction sequence is created, it is transformed into a CFG +by ``_PyCfg_FromInstructionSequence()``. Then ``_PyCfg_OptimizeCodeUnit()`` +applies various peephole optimizations, and +``_PyCfg_OptimizedCfgToInstructionSequence()`` converts the optimized ``CFG`` +back into an instruction sequence. These conversions and optimizations are +implemented in +[Python/flowgraph.c](https://github.com/python/cpython/blob/main/Python/flowgraph.c). + +Finally, the sequence of pseudo-instructions is converted into actual +bytecode. This includes transforming pseudo instructions into actual instructions, +converting jump targets from logical labels to relative offsets, and +construction of the +[exception table](exception_handling.md) and +[locations table](https://github.com/python/cpython/blob/main/Objects/locations.md). +The bytecode and tables are then wrapped into a ``PyCodeObject`` along with additional +metadata, including the ``consts`` and ``names`` arrays, information about function +reference to the source code (filename, etc). All of this is implemented by +``_PyAssemble_MakeCodeObject()`` in +[Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). + + +Code objects +============ + +The result of ``PyAST_CompileObject()`` is a ``PyCodeObject`` which is defined in +[Include/cpython/code.h](https://github.com/python/cpython/blob/main/Include/cpython/code.h). +And with that you now have executable Python bytecode! + +The code objects (byte code) are executed in +[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c). +This file will also need a new case statement for the new opcode in the big switch +statement in ``_PyEval_EvalFrameDefault()``. + + +Important files +=============== + +* [Parser/](https://github.com/python/cpython/blob/main/Parser/) + + * [Parser/Python.asdl](https://github.com/python/cpython/blob/main/Parser/Python.asdl): + ASDL syntax file. + + * [Parser/asdl.py](https://github.com/python/cpython/blob/main/Parser/asdl.py): + Parser for ASDL definition files. + Reads in an ASDL description and parses it into an AST that describes it. + + * [Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py): + Generate C code from an ASDL description. Generates + [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) + and + [Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h). + + * [Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c): + The new PEG parser introduced in Python 3.9. + Generated by + [Tools/peg_generator/pegen/c_generator.py](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/c_generator.py) + from the grammar [Grammar/python.gram](https://github.com/python/cpython/blob/main/Grammar/python.gram). + Creates the AST from source code. Rule functions for their corresponding production + rules are found here. + + * [Parser/peg_api.c](https://github.com/python/cpython/blob/main/Parser/peg_api.c): + Contains high-level functions which are + used by the interpreter to create an AST from source code. + + * [Parser/pegen.c](https://github.com/python/cpython/blob/main/Parser/pegen.c): + Contains helper functions which are used by functions in + [Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c) + to construct the AST. Also contains helper functions which help raise better error messages + when parsing source code. + + * [Parser/pegen.h](https://github.com/python/cpython/blob/main/Parser/pegen.h): + Header file for the corresponding + [Parser/pegen.c](https://github.com/python/cpython/blob/main/Parser/pegen.c). + Also contains definitions of the ``Parser`` and ``Token`` structs. + +* [Python/](https://github.com/python/cpython/blob/main/Python) + + * [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c): + Creates C structs corresponding to the ASDL types. Also contains code for + marshalling AST nodes (core ASDL types have marshalling code in + [Python/asdl.c](https://github.com/python/cpython/blob/main/Python/asdl.c)). + "File automatically generated by + [Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py). + This file must be committed separately after every grammar change + is committed since the ``__version__`` value is set to the latest + grammar change revision number. + + * [Python/asdl.c](https://github.com/python/cpython/blob/main/Python/asdl.c): + Contains code to handle the ASDL sequence type. + Also has code to handle marshalling the core ASDL types, such as number + and identifier. Used by + [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) + for marshalling AST nodes. + + * [Python/ast.c](https://github.com/python/cpython/blob/main/Python/ast.c): + Used for validating the AST. + + * [Python/ast_opt.c](https://github.com/python/cpython/blob/main/Python/ast_opt.c): + Optimizes the AST. + + * [Python/ast_unparse.c](https://github.com/python/cpython/blob/main/Python/ast_unparse.c): + Converts the AST expression node back into a string (for string annotations). + + * [Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c): + Executes byte code (aka, eval loop). + + * [Python/symtable.c](https://github.com/python/cpython/blob/main/Python/symtable.c): + Generates a symbol table from AST. + + * [Python/pyarena.c](https://github.com/python/cpython/blob/main/Python/pyarena.c): + Implementation of the arena memory manager. + + * [Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c): + Emits pseudo bytecode based on the AST. + + * [Python/flowgraph.c](https://github.com/python/cpython/blob/main/Python/flowgraph.c): + Implements peephole optimizations. + + * [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c): + Constructs a code object from a sequence of pseudo instructions. + + * [Python/instruction_sequence.c.c](https://github.com/python/cpython/blob/main/Python/instruction_sequence.c.c): + A data structure representing a sequence of bytecode-like pseudo-instructions. + +* [Include/](https://github.com/python/cpython/blob/main/Include/) + + * [Include/cpython/code.h](https://github.com/python/cpython/blob/main/Include/cpython/code.h) + : Header file for + [Objects/codeobject.c](https://github.com/python/cpython/blob/main/Objects/codeobject.c); + contains definition of ``PyCodeObject``. + + * [Include/opcode.h](https://github.com/python/cpython/blob/main/Include/opcode.h) + : One of the files that must be modified if + [Lib/opcode.py](https://github.com/python/cpython/blob/main/Lib/opcode.py) is. + + * [Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h) + : Contains the actual definitions of the C structs as generated by + [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) + "Automatically generated by + [Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py). + + * [Include/internal/pycore_asdl.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_asdl.h) + : Header for the corresponding + [Python/ast.c](https://github.com/python/cpython/blob/main/Python/ast.c). + + * [Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h) + : Declares ``_PyAST_Validate()`` external (from + [Python/ast.c](https://github.com/python/cpython/blob/main/Python/ast.c)). + + * [Include/internal/pycore_symtable.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_symtable.h) + : Header for + [Python/symtable.c](https://github.com/python/cpython/blob/main/Python/symtable.c). + ``struct symtable`` and ``PySTEntryObject`` are defined here. + + * [Include/internal/pycore_parser.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_parser.h) + : Header for the corresponding + [Parser/peg_api.c](https://github.com/python/cpython/blob/main/Parser/peg_api.c). + + * [Include/internal/pycore_pyarena.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_pyarena.h) + : Header file for the corresponding + [Python/pyarena.c](https://github.com/python/cpython/blob/main/Python/pyarena.c). + + * [Include/opcode_ids.h](https://github.com/python/cpython/blob/main/Include/opcode_ids.h) + : List of opcodes. Generated from + [Python/bytecodes.c](https://github.com/python/cpython/blob/main/Python/bytecodes.c) + by + [Tools/cases_generator/opcode_id_generator.py](https://github.com/python/cpython/blob/main/Tools/cases_generator/opcode_id_generator.py). + +* [Objects/](https://github.com/python/cpython/blob/main/Objects/) + + * [Objects/codeobject.c](https://github.com/python/cpython/blob/main/Objects/codeobject.c) + : Contains PyCodeObject-related code. + + * [Objects/frameobject.c](https://github.com/python/cpython/blob/main/Objects/frameobject.c) + : Contains the ``frame_setlineno()`` function which should determine whether it is allowed + to make a jump between two points in a bytecode. + +* [Lib/](https://github.com/python/cpython/blob/main/Lib/) + + * [Lib/opcode.py](https://github.com/python/cpython/blob/main/Lib/opcode.py) + : opcode utilities exposed to Python. + + * [Lib/importlib/_bootstrap_external.py](https://github.com/python/cpython/blob/main/Lib/importlib/_bootstrap_external.py) + : Home of the magic number (named ``MAGIC_NUMBER``) for bytecode versioning. + + +Objects +======= + +* [Objects/locations.md](https://github.com/python/cpython/blob/main/Objects/locations.md): Describes the location table +* [Objects/frame_layout.md](https://github.com/python/cpython/blob/main/Objects/frame_layout.md): Describes the frame stack +* [Objects/object_layout.md](https://github.com/python/cpython/blob/main/Objects/object_layout.md): Descibes object layout for 3.11 and later +* [Exception Handling](exception_handling.md): Describes the exception table + + +Specializing Adaptive Interpreter +================================= + +Adding a specializing, adaptive interpreter to CPython will bring significant +performance improvements. These documents provide more information: + +* [PEP 659: Specializing Adaptive Interpreter](https://peps.python.org/pep-0659/). +* [Adding or extending a family of adaptive instructions](adaptive.md) + + +References +========== + +[^1]: Daniel C. Wang, Andrew W. Appel, Jeff L. Korn, and Chris + S. Serra. `The Zephyr Abstract Syntax Description Language.`_ + In Proceedings of the Conference on Domain-Specific Languages, + pp. 213--227, 1997. + +[^2]: The Zephyr Abstract Syntax Description Language.: + https://www.cs.princeton.edu/research/techreps/TR-554-97 From 7aff2de62bc28eb23888270b698c6b6915f69b21 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 10 Jun 2024 18:34:17 +0200 Subject: [PATCH 145/373] gh-120057: Add os.environ.refresh() method (#120059) --- Doc/library/os.rst | 11 +++++ Doc/whatsnew/3.14.rst | 7 +++ Lib/os.py | 25 ++++++++-- Lib/test/test_os.py | 46 +++++++++++++++++++ ...-06-04-18-53-10.gh-issue-120057.RSD9_Z.rst | 4 ++ Modules/clinic/posixmodule.c.h | 20 +++++++- Modules/posixmodule.c | 15 ++++++ 7 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-04-18-53-10.gh-issue-120057.RSD9_Z.rst diff --git a/Doc/library/os.rst b/Doc/library/os.rst index b93b06d4e72afc..360d71e70960c7 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -193,6 +193,10 @@ process and user. to the environment made after this time are not reflected in :data:`os.environ`, except for changes made by modifying :data:`os.environ` directly. + The :meth:`!os.environ.refresh()` method updates :data:`os.environ` with + changes to the environment made by :func:`os.putenv`, by + :func:`os.unsetenv`, or made outside Python in the same process. + This mapping may be used to modify the environment as well as query the environment. :func:`putenv` will be called automatically when the mapping is modified. @@ -225,6 +229,9 @@ process and user. .. versionchanged:: 3.9 Updated to support :pep:`584`'s merge (``|``) and update (``|=``) operators. + .. versionchanged:: 3.14 + Added the :meth:`!os.environ.refresh()` method. + .. data:: environb @@ -561,6 +568,8 @@ process and user. of :data:`os.environ`. This also applies to :func:`getenv` and :func:`getenvb`, which respectively use :data:`os.environ` and :data:`os.environb` in their implementations. + See also the :data:`os.environ.refresh() ` method. + .. note:: On some platforms, including FreeBSD and macOS, setting ``environ`` may @@ -809,6 +818,8 @@ process and user. don't update :data:`os.environ`, so it is actually preferable to delete items of :data:`os.environ`. + See also the :data:`os.environ.refresh() ` method. + .. audit-event:: os.unsetenv key os.unsetenv .. versionchanged:: 3.9 diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b2dd80b64a691a..b77ff30a8fbbee 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -92,6 +92,13 @@ ast Added :func:`ast.compare` for comparing two ASTs. (Contributed by Batuhan Taskaya and Jeremy Hylton in :issue:`15987`.) +os +-- + +* Added the :data:`os.environ.refresh() ` method to update + :data:`os.environ` with changes to the environment made by :func:`os.putenv`, + by :func:`os.unsetenv`, or made outside Python in the same process. + (Contributed by Victor Stinner in :gh:`120057`.) Optimizations diff --git a/Lib/os.py b/Lib/os.py index 0408e2db79e66e..4b48afb040e565 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -64,6 +64,10 @@ def _get_exports_list(module): from posix import _have_functions except ImportError: pass + try: + from posix import _create_environ + except ImportError: + pass import posix __all__.extend(_get_exports_list(posix)) @@ -88,6 +92,10 @@ def _get_exports_list(module): from nt import _have_functions except ImportError: pass + try: + from nt import _create_environ + except ImportError: + pass else: raise ImportError('no os specific module found') @@ -773,7 +781,18 @@ def __ror__(self, other): new.update(self) return new -def _createenviron(): + if _exists("_create_environ"): + def refresh(self): + data = _create_environ() + if name == 'nt': + data = {self.encodekey(key): value + for key, value in data.items()} + + # modify in-place to keep os.environb in sync + self._data.clear() + self._data.update(data) + +def _create_environ_mapping(): if name == 'nt': # Where Env Var Names Must Be UPPERCASE def check_str(value): @@ -803,8 +822,8 @@ def decode(value): encode, decode) # unicode environ -environ = _createenviron() -del _createenviron +environ = _create_environ_mapping() +del _create_environ_mapping def getenv(key, default=None): diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 2beb9ca8aa6ccb..f93937fb587386 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1298,6 +1298,52 @@ def test_ror_operator(self): self._test_underlying_process_env('_A_', '') self._test_underlying_process_env(overridden_key, original_value) + def test_refresh(self): + # Test os.environ.refresh() + has_environb = hasattr(os, 'environb') + + # Test with putenv() which doesn't update os.environ + os.environ['test_env'] = 'python_value' + os.putenv("test_env", "new_value") + self.assertEqual(os.environ['test_env'], 'python_value') + if has_environb: + self.assertEqual(os.environb[b'test_env'], b'python_value') + + os.environ.refresh() + self.assertEqual(os.environ['test_env'], 'new_value') + if has_environb: + self.assertEqual(os.environb[b'test_env'], b'new_value') + + # Test with unsetenv() which doesn't update os.environ + os.unsetenv('test_env') + self.assertEqual(os.environ['test_env'], 'new_value') + if has_environb: + self.assertEqual(os.environb[b'test_env'], b'new_value') + + os.environ.refresh() + self.assertNotIn('test_env', os.environ) + if has_environb: + self.assertNotIn(b'test_env', os.environb) + + if has_environb: + # test os.environb.refresh() with putenv() + os.environb[b'test_env'] = b'python_value2' + os.putenv("test_env", "new_value2") + self.assertEqual(os.environb[b'test_env'], b'python_value2') + self.assertEqual(os.environ['test_env'], 'python_value2') + + os.environb.refresh() + self.assertEqual(os.environb[b'test_env'], b'new_value2') + self.assertEqual(os.environ['test_env'], 'new_value2') + + # test os.environb.refresh() with unsetenv() + os.unsetenv('test_env') + self.assertEqual(os.environb[b'test_env'], b'new_value2') + self.assertEqual(os.environ['test_env'], 'new_value2') + + os.environb.refresh() + self.assertNotIn(b'test_env', os.environb) + self.assertNotIn('test_env', os.environ) class WalkTests(unittest.TestCase): """Tests for os.walk().""" diff --git a/Misc/NEWS.d/next/Library/2024-06-04-18-53-10.gh-issue-120057.RSD9_Z.rst b/Misc/NEWS.d/next/Library/2024-06-04-18-53-10.gh-issue-120057.RSD9_Z.rst new file mode 100644 index 00000000000000..955be59821ee0c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-04-18-53-10.gh-issue-120057.RSD9_Z.rst @@ -0,0 +1,4 @@ +Added the :data:`os.environ.refresh() ` method to update +:data:`os.environ` with changes to the environment made by :func:`os.putenv`, +by :func:`os.unsetenv`, or made outside Python in the same process. +Patch by Victor Stinner. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 69fc178331c09c..07b28fef3a57ea 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -12152,6 +12152,24 @@ os__is_inputhook_installed(PyObject *module, PyObject *Py_UNUSED(ignored)) return os__is_inputhook_installed_impl(module); } +PyDoc_STRVAR(os__create_environ__doc__, +"_create_environ($module, /)\n" +"--\n" +"\n" +"Create the environment dictionary."); + +#define OS__CREATE_ENVIRON_METHODDEF \ + {"_create_environ", (PyCFunction)os__create_environ, METH_NOARGS, os__create_environ__doc__}, + +static PyObject * +os__create_environ_impl(PyObject *module); + +static PyObject * +os__create_environ(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return os__create_environ_impl(module); +} + #ifndef OS_TTYNAME_METHODDEF #define OS_TTYNAME_METHODDEF #endif /* !defined(OS_TTYNAME_METHODDEF) */ @@ -12819,4 +12837,4 @@ os__is_inputhook_installed(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=faaa5e5ffb7b165d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5ae2e5ffcd9c8a84 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 5f943d4b1c8085..a8fd5c494769b5 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -16809,6 +16809,20 @@ os__is_inputhook_installed_impl(PyObject *module) return PyBool_FromLong(PyOS_InputHook != NULL); } +/*[clinic input] +os._create_environ + +Create the environment dictionary. +[clinic start generated code]*/ + +static PyObject * +os__create_environ_impl(PyObject *module) +/*[clinic end generated code: output=19d9039ab14f8ad4 input=a4c05686b34635e8]*/ +{ + return convertenviron(); +} + + static PyMethodDef posix_methods[] = { OS_STAT_METHODDEF @@ -17023,6 +17037,7 @@ static PyMethodDef posix_methods[] = { OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF OS__INPUTHOOK_METHODDEF OS__IS_INPUTHOOK_INSTALLED_METHODDEF + OS__CREATE_ENVIRON_METHODDEF {NULL, NULL} /* Sentinel */ }; From 6efe3460693c4f39de198a64cebeeee8b1d4e8b6 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 11 Jun 2024 00:45:16 +0800 Subject: [PATCH 146/373] Fix the CODEOWNERS for _interpretersmodule.c (gh-120288) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 811b8cfdab17dc..8bc40fcb9e8999 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -245,7 +245,7 @@ Doc/howto/clinic.rst @erlend-aasland **/*interpreteridobject.* @ericsnowcurrently **/*crossinterp* @ericsnowcurrently Lib/test/support/interpreters/ @ericsnowcurrently -Modules/_xx*interp*module.c @ericsnowcurrently +Modules/_interp*module.c @ericsnowcurrently Lib/test/test_interpreters/ @ericsnowcurrently # Android From 422c4fc855afd18bcc6415902ea1d85a50cb7ce1 Mon Sep 17 00:00:00 2001 From: Robert Collins Date: Tue, 11 Jun 2024 07:41:12 +0200 Subject: [PATCH 147/373] gh-119600: mock: do not access attributes of original when new_callable is set (#119601) In order to patch flask.g e.g. as in #84982, that proxies getattr must not be invoked. For that, mock must not try to read from the original object. In some cases that is unavoidable, e.g. when doing autospec. However, patch("flask.g", new_callable=MagicMock) should be entirely safe. --- Lib/test/test_unittest/testmock/support.py | 11 +++++++++++ Lib/test/test_unittest/testmock/testpatch.py | 7 +++++++ Lib/unittest/mock.py | 14 +++++++++----- .../2024-06-10-14-00-40.gh-issue-119600.jJMf4C.rst | 2 ++ 4 files changed, 29 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-10-14-00-40.gh-issue-119600.jJMf4C.rst diff --git a/Lib/test/test_unittest/testmock/support.py b/Lib/test/test_unittest/testmock/support.py index 49986d65dc47af..6c535b7944f261 100644 --- a/Lib/test/test_unittest/testmock/support.py +++ b/Lib/test/test_unittest/testmock/support.py @@ -14,3 +14,14 @@ def wibble(self): pass class X(object): pass + +# A standin for weurkzeug.local.LocalProxy - issue 119600 +def _inaccessible(*args, **kwargs): + raise AttributeError + + +class OpaqueProxy: + __getattribute__ = _inaccessible + + +g = OpaqueProxy() diff --git a/Lib/test/test_unittest/testmock/testpatch.py b/Lib/test/test_unittest/testmock/testpatch.py index be75fda7826af1..f26e74ce0bc1ba 100644 --- a/Lib/test/test_unittest/testmock/testpatch.py +++ b/Lib/test/test_unittest/testmock/testpatch.py @@ -2045,6 +2045,13 @@ def test(): pass with self.assertRaises(TypeError): test() + def test_patch_proxy_object(self): + @patch("test.test_unittest.testmock.support.g", new_callable=MagicMock()) + def test(_): + pass + + test() + if __name__ == '__main__': unittest.main() diff --git a/Lib/unittest/mock.py b/Lib/unittest/mock.py index 3ef83e263f53b7..edabb4520c13cd 100644 --- a/Lib/unittest/mock.py +++ b/Lib/unittest/mock.py @@ -1508,13 +1508,12 @@ def __enter__(self): if isinstance(original, type): # If we're patching out a class and there is a spec inherit = True - if spec is None and _is_async_obj(original): - Klass = AsyncMock - else: - Klass = MagicMock - _kwargs = {} + + # Determine the Klass to use if new_callable is not None: Klass = new_callable + elif spec is None and _is_async_obj(original): + Klass = AsyncMock elif spec is not None or spec_set is not None: this_spec = spec if spec_set is not None: @@ -1527,7 +1526,12 @@ def __enter__(self): Klass = AsyncMock elif not_callable: Klass = NonCallableMagicMock + else: + Klass = MagicMock + else: + Klass = MagicMock + _kwargs = {} if spec is not None: _kwargs['spec'] = spec if spec_set is not None: diff --git a/Misc/NEWS.d/next/Library/2024-06-10-14-00-40.gh-issue-119600.jJMf4C.rst b/Misc/NEWS.d/next/Library/2024-06-10-14-00-40.gh-issue-119600.jJMf4C.rst new file mode 100644 index 00000000000000..04c9ca9c3fd737 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-10-14-00-40.gh-issue-119600.jJMf4C.rst @@ -0,0 +1,2 @@ +Fix :func:`unittest.mock.patch` to not read attributes of the target when +``new_callable`` is set. Patch by Robert Collins. From 9e9ee50421c857b443e2060274f17fb884d54473 Mon Sep 17 00:00:00 2001 From: blhsing Date: Tue, 11 Jun 2024 13:42:49 +0800 Subject: [PATCH 148/373] gh-65454: avoid triggering call to a PropertyMock in NonCallableMock.__setattr__ (#120019) --- Lib/test/test_unittest/testmock/testhelpers.py | 8 ++++++++ Lib/unittest/mock.py | 3 +++ .../Library/2024-06-04-08-57-02.gh-issue-65454.o9j4wF.rst | 1 + 3 files changed, 12 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-06-04-08-57-02.gh-issue-65454.o9j4wF.rst diff --git a/Lib/test/test_unittest/testmock/testhelpers.py b/Lib/test/test_unittest/testmock/testhelpers.py index 74785a83757a92..c9c20f008ca5a2 100644 --- a/Lib/test/test_unittest/testmock/testhelpers.py +++ b/Lib/test/test_unittest/testmock/testhelpers.py @@ -1127,6 +1127,14 @@ def test_propertymock_side_effect(self): p.assert_called_once_with() + def test_propertymock_attach(self): + m = Mock() + p = PropertyMock() + type(m).foo = p + m.attach_mock(p, 'foo') + self.assertEqual(m.mock_calls, []) + + class TestCallablePredicate(unittest.TestCase): def test_type(self): diff --git a/Lib/unittest/mock.py b/Lib/unittest/mock.py index edabb4520c13cd..08975e0e1bd132 100644 --- a/Lib/unittest/mock.py +++ b/Lib/unittest/mock.py @@ -830,6 +830,9 @@ def __setattr__(self, name, value): mock_name = f'{self._extract_mock_name()}.{name}' raise AttributeError(f'Cannot set {mock_name}') + if isinstance(value, PropertyMock): + self.__dict__[name] = value + return return object.__setattr__(self, name, value) diff --git a/Misc/NEWS.d/next/Library/2024-06-04-08-57-02.gh-issue-65454.o9j4wF.rst b/Misc/NEWS.d/next/Library/2024-06-04-08-57-02.gh-issue-65454.o9j4wF.rst new file mode 100644 index 00000000000000..0b232cf8ca1baf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-04-08-57-02.gh-issue-65454.o9j4wF.rst @@ -0,0 +1 @@ +:func:`unittest.mock.Mock.attach_mock` no longer triggers a call to a ``PropertyMock`` being attached. From 141babad9b4eceb83371bf19ba3a36b50dd05250 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Tue, 11 Jun 2024 10:04:27 +0300 Subject: [PATCH 149/373] gh-120298: Fix use-after-free in `list_richcompare_impl` (#120303) Co-authored-by: Serhiy Storchaka --- Lib/test/test_list.py | 11 +++++++++++ .../2024-06-10-10-42-48.gh-issue-120298.napREA.rst | 2 ++ Objects/listobject.c | 9 ++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-10-10-42-48.gh-issue-120298.napREA.rst diff --git a/Lib/test/test_list.py b/Lib/test/test_list.py index 0601b33e79ebb6..d21429fae09b37 100644 --- a/Lib/test/test_list.py +++ b/Lib/test/test_list.py @@ -234,6 +234,17 @@ def __eq__(self, other): list4 = [1] self.assertFalse(list3 == list4) + def test_lt_operator_modifying_operand(self): + # See gh-120298 + class evil: + def __lt__(self, other): + other.clear() + return NotImplemented + + a = [[evil()]] + with self.assertRaises(TypeError): + a[0] < a + @cpython_only def test_preallocation(self): iterable = [0] * 10 diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-10-10-42-48.gh-issue-120298.napREA.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-10-10-42-48.gh-issue-120298.napREA.rst new file mode 100644 index 00000000000000..531d39517ac423 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-10-10-42-48.gh-issue-120298.napREA.rst @@ -0,0 +1,2 @@ +Fix use-after free in ``list_richcompare_impl`` which can be invoked via +some specificly tailored evil input. diff --git a/Objects/listobject.c b/Objects/listobject.c index d09bb6391034d1..6829d5d28656cf 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3382,7 +3382,14 @@ list_richcompare_impl(PyObject *v, PyObject *w, int op) } /* Compare the final item again using the proper operator */ - return PyObject_RichCompare(vl->ob_item[i], wl->ob_item[i], op); + PyObject *vitem = vl->ob_item[i]; + PyObject *witem = wl->ob_item[i]; + Py_INCREF(vitem); + Py_INCREF(witem); + PyObject *result = PyObject_RichCompare(vl->ob_item[i], wl->ob_item[i], op); + Py_DECREF(vitem); + Py_DECREF(witem); + return result; } static PyObject * From 7d2447137e117ea9a6ee1493bce0b071c76b1bd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 11 Jun 2024 09:11:13 +0200 Subject: [PATCH 150/373] gh-120291: Fix a bashism in python-config.sh.in (#120292) gh-120291: Fix bashisms in python-config.sh.in Replace the use of bash-specific `[[ ... ]]` with POSIX-compliant `[ ... ]` to make the `python-config` shell script work with non-bash shells again. While at it, use `local` in a safer way, since it is not in POSIX either (though universally supported). Fixes #120291 --- .../Build/2024-06-09-15-54-22.gh-issue-120291.IpfHzE.rst | 1 + Misc/python-config.sh.in | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-06-09-15-54-22.gh-issue-120291.IpfHzE.rst diff --git a/Misc/NEWS.d/next/Build/2024-06-09-15-54-22.gh-issue-120291.IpfHzE.rst b/Misc/NEWS.d/next/Build/2024-06-09-15-54-22.gh-issue-120291.IpfHzE.rst new file mode 100644 index 00000000000000..d0bb297b51dc6e --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-06-09-15-54-22.gh-issue-120291.IpfHzE.rst @@ -0,0 +1 @@ +Make the ``python-config`` shell script compatible with non-bash shells. diff --git a/Misc/python-config.sh.in b/Misc/python-config.sh.in index c3c0b34fc1451d..9929f5b2653dca 100644 --- a/Misc/python-config.sh.in +++ b/Misc/python-config.sh.in @@ -4,11 +4,12 @@ exit_with_usage () { - local USAGE="Usage: $0 --prefix|--exec-prefix|--includes|--libs|--cflags|--ldflags|--extension-suffix|--help|--abiflags|--configdir|--embed" - if [[ "$1" -eq 0 ]]; then - echo "$USAGE" + local usage + usage="Usage: $0 --prefix|--exec-prefix|--includes|--libs|--cflags|--ldflags|--extension-suffix|--help|--abiflags|--configdir|--embed" + if [ "$1" -eq 0 ]; then + echo "$usage" else - echo "$USAGE" >&2 + echo "$usage" >&2 fi exit $1 } From 02c1dfff073a3dd6ce34a11b038defde291c2203 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Tue, 11 Jun 2024 10:56:38 +0300 Subject: [PATCH 151/373] gh-120080: Mark test_round_with_none_arg_direct_call as cpython_only (#120328) --- Lib/test/test_float.py | 1 + Lib/test/test_int.py | 1 + 2 files changed, 2 insertions(+) diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 53695cefb8fded..756cf9bd7719c0 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -949,6 +949,7 @@ def test_None_ndigits(self): self.assertEqual(x, 2) self.assertIsInstance(x, int) + @support.cpython_only def test_round_with_none_arg_direct_call(self): for val in [(1.0).__round__(None), round(1.0), diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 77221dfb6d5aa2..2747d9219255ac 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -517,6 +517,7 @@ def test_issue31619(self): self.assertEqual(int('1_2_3_4_5_6_7_8_9', 16), 0x123456789) self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807) + @support.cpython_only def test_round_with_none_arg_direct_call(self): for val in [(1).__round__(None), round(1), From 9b8611eeea172cd4aa626ccd1ca333dc4093cd8c Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Tue, 11 Jun 2024 07:06:49 -0600 Subject: [PATCH 152/373] gh-119180: PEP 649 compiler changes (#119361) --- .../pycore_global_objects_fini_generated.h | 2 +- Include/internal/pycore_global_strings.h | 2 +- Include/internal/pycore_opcode_utils.h | 1 + .../internal/pycore_runtime_init_generated.h | 2 +- Include/internal/pycore_symtable.h | 8 +- .../internal/pycore_unicodeobject_generated.h | 3 - Lib/inspect.py | 8 +- Lib/symtable.py | 2 + Lib/test/test_dis.py | 29 +- Lib/test/test_grammar.py | 69 +--- Lib/test/test_module/__init__.py | 2 + Lib/test/test_opcodes.py | 13 +- Lib/test/test_positional_only_arg.py | 5 +- Lib/test/test_pyclbr.py | 2 + Lib/test/test_pydoc/test_pydoc.py | 11 +- Lib/test/test_pyrepl/test_interact.py | 2 +- Lib/test/test_symtable.py | 6 +- Lib/test/test_traceback.py | 5 +- Lib/test/test_type_annotations.py | 163 +++++++- Lib/test/test_typing.py | 4 +- Lib/test/typinganndata/ann_module.py | 4 - Lib/typing.py | 23 +- ...-05-22-06-22-47.gh-issue-119180.vZMiXm.rst | 1 + Python/bytecodes.c | 5 + Python/compile.c | 357 ++++++++++-------- Python/executor_cases.c.h | 5 + Python/generated_cases.c.h | 5 + Python/symtable.c | 198 +++++++--- 28 files changed, 609 insertions(+), 328 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-22-06-22-47.gh-issue-119180.vZMiXm.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index b186408931c92e..30851dc2dbec44 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -559,6 +559,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot_locals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(empty)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(format)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(generic_base)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults)); @@ -745,7 +746,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_abstract_)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_active)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_align_)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_annotation)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_anonymous_)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_argtypes_)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_as_parameter_)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index e1808c85acfb2d..009802c441685c 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -45,6 +45,7 @@ struct _Py_global_strings { STRUCT_FOR_STR(dot, ".") STRUCT_FOR_STR(dot_locals, ".") STRUCT_FOR_STR(empty, "") + STRUCT_FOR_STR(format, ".format") STRUCT_FOR_STR(generic_base, ".generic_base") STRUCT_FOR_STR(json_decoder, "json.decoder") STRUCT_FOR_STR(kwdefaults, ".kwdefaults") @@ -234,7 +235,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(_abstract_) STRUCT_FOR_ID(_active) STRUCT_FOR_ID(_align_) - STRUCT_FOR_ID(_annotation) STRUCT_FOR_ID(_anonymous_) STRUCT_FOR_ID(_argtypes_) STRUCT_FOR_ID(_as_parameter_) diff --git a/Include/internal/pycore_opcode_utils.h b/Include/internal/pycore_opcode_utils.h index b06e469dd5bd91..e76f4840a66891 100644 --- a/Include/internal/pycore_opcode_utils.h +++ b/Include/internal/pycore_opcode_utils.h @@ -57,6 +57,7 @@ extern "C" { #define MAKE_FUNCTION_KWDEFAULTS 0x02 #define MAKE_FUNCTION_ANNOTATIONS 0x04 #define MAKE_FUNCTION_CLOSURE 0x08 +#define MAKE_FUNCTION_ANNOTATE 0x10 /* Values used as the oparg for LOAD_COMMON_CONSTANT */ #define CONSTANT_ASSERTIONERROR 0 diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 2dde6febc2cae4..ff5b6ee8e0f006 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -554,6 +554,7 @@ extern "C" { INIT_STR(dot, "."), \ INIT_STR(dot_locals, "."), \ INIT_STR(empty, ""), \ + INIT_STR(format, ".format"), \ INIT_STR(generic_base, ".generic_base"), \ INIT_STR(json_decoder, "json.decoder"), \ INIT_STR(kwdefaults, ".kwdefaults"), \ @@ -743,7 +744,6 @@ extern "C" { INIT_ID(_abstract_), \ INIT_ID(_active), \ INIT_ID(_align_), \ - INIT_ID(_annotation), \ INIT_ID(_anonymous_), \ INIT_ID(_argtypes_), \ INIT_ID(_as_parameter_), \ diff --git a/Include/internal/pycore_symtable.h b/Include/internal/pycore_symtable.h index ac6c499c08264e..5d544765237df5 100644 --- a/Include/internal/pycore_symtable.h +++ b/Include/internal/pycore_symtable.h @@ -12,8 +12,9 @@ struct _mod; // Type defined in pycore_ast.h typedef enum _block_type { FunctionBlock, ClassBlock, ModuleBlock, - // Used for annotations if 'from __future__ import annotations' is active. - // Annotation blocks cannot bind names and are not evaluated. + // Used for annotations. If 'from __future__ import annotations' is active, + // annotation blocks cannot bind names and are not evaluated. Otherwise, they + // are lazily evaluated (see PEP 649). AnnotationBlock, // Used for generics and type aliases. These work mostly like functions // (see PEP 695 for details). The three different blocks function identically; @@ -89,6 +90,7 @@ typedef struct _symtable_entry { including free refs to globals */ unsigned ste_generator : 1; /* true if namespace is a generator */ unsigned ste_coroutine : 1; /* true if namespace is a coroutine */ + unsigned ste_annotations_used : 1; /* true if there are any annotations in this scope */ _Py_comprehension_ty ste_comprehension; /* Kind of comprehension (if any) */ unsigned ste_varargs : 1; /* true if block has varargs */ unsigned ste_varkeywords : 1; /* true if block has varkeywords */ @@ -110,6 +112,7 @@ typedef struct _symtable_entry { int ste_end_col_offset; /* end offset of first line of block */ int ste_opt_lineno; /* lineno of last exec or import * */ int ste_opt_col_offset; /* offset of last exec or import * */ + struct _symtable_entry *ste_annotation_block; /* symbol table entry for this entry's annotations */ struct symtable *ste_table; } PySTEntryObject; @@ -126,6 +129,7 @@ extern struct symtable* _PySymtable_Build( PyObject *filename, _PyFutureFeatures *future); extern PySTEntryObject* _PySymtable_Lookup(struct symtable *, void *); +extern int _PySymtable_LookupOptional(struct symtable *, void *, PySTEntryObject **); extern void _PySymtable_Free(struct symtable *); diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index b00119a1bad7ff..69d93a9610a2e5 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -543,9 +543,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(_align_); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); - string = &_Py_ID(_annotation); - assert(_PyUnicode_CheckConsistency(string, 1)); - _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(_anonymous_); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/inspect.py b/Lib/inspect.py index 2b7f8bec482f8e..5570a43ebfea19 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -220,13 +220,7 @@ def get_annotations(obj, *, globals=None, locals=None, eval_str=False): """ if isinstance(obj, type): # class - obj_dict = getattr(obj, '__dict__', None) - if obj_dict and hasattr(obj_dict, 'get'): - ann = obj_dict.get('__annotations__', None) - if isinstance(ann, types.GetSetDescriptorType): - ann = None - else: - ann = None + ann = obj.__annotations__ obj_globals = None module_name = getattr(obj, '__module__', None) diff --git a/Lib/symtable.py b/Lib/symtable.py index ba2f0dafcd0063..af65e93e68eda4 100644 --- a/Lib/symtable.py +++ b/Lib/symtable.py @@ -222,6 +222,8 @@ def get_methods(self): if self.__methods is None: d = {} for st in self._table.children: + if st.type == _symtable.TYPE_ANNOTATION: + continue d[st.name] = 1 self.__methods = tuple(d) return self.__methods diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index b1a1b77c53e8cb..b0ae1289224070 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -352,32 +352,21 @@ def wrap_func_w_kwargs(): dis_annot_stmt_str = """\ 0 RESUME 0 - 2 SETUP_ANNOTATIONS - LOAD_CONST 0 (1) + 2 LOAD_CONST 0 (1) STORE_NAME 0 (x) - LOAD_NAME 1 (int) - LOAD_NAME 2 (__annotations__) - LOAD_CONST 1 ('x') - STORE_SUBSCR - - 3 LOAD_NAME 3 (fun) - PUSH_NULL - LOAD_CONST 0 (1) - CALL 1 - LOAD_NAME 2 (__annotations__) - LOAD_CONST 2 ('y') - STORE_SUBSCR 4 LOAD_CONST 0 (1) - LOAD_NAME 4 (lst) - LOAD_NAME 3 (fun) + LOAD_NAME 1 (lst) + LOAD_NAME 2 (fun) PUSH_NULL - LOAD_CONST 3 (0) + LOAD_CONST 1 (0) CALL 1 STORE_SUBSCR - LOAD_NAME 1 (int) - POP_TOP - RETURN_CONST 4 (None) + + 2 LOAD_CONST 2 (", line 2>) + MAKE_FUNCTION + STORE_NAME 3 (__annotate__) + RETURN_CONST 3 (None) """ compound_stmt_str = """\ diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index c72f4387108ca8..5b7a639c025a0f 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -306,16 +306,6 @@ def test_eof_error(self): var_annot_global: int # a global annotated is necessary for test_var_annot -# custom namespace for testing __annotations__ - -class CNS: - def __init__(self): - self._dct = {} - def __setitem__(self, item, value): - self._dct[item.lower()] = value - def __getitem__(self, item): - return self._dct[item] - class GrammarTests(unittest.TestCase): @@ -446,22 +436,12 @@ class F(C, A): self.assertEqual(E.__annotations__, {}) self.assertEqual(F.__annotations__, {}) - - def test_var_annot_metaclass_semantics(self): - class CMeta(type): - @classmethod - def __prepare__(metacls, name, bases, **kwds): - return {'__annotations__': CNS()} - class CC(metaclass=CMeta): - XX: 'ANNOT' - self.assertEqual(CC.__annotations__['xx'], 'ANNOT') - def test_var_annot_module_semantics(self): self.assertEqual(test.__annotations__, {}) self.assertEqual(ann_module.__annotations__, - {1: 2, 'x': int, 'y': str, 'f': typing.Tuple[int, int], 'u': int | float}) + {'x': int, 'y': str, 'f': typing.Tuple[int, int], 'u': int | float}) self.assertEqual(ann_module.M.__annotations__, - {'123': 123, 'o': type}) + {'o': type}) self.assertEqual(ann_module2.__annotations__, {}) def test_var_annot_in_module(self): @@ -476,51 +456,12 @@ def test_var_annot_in_module(self): ann_module3.D_bad_ann(5) def test_var_annot_simple_exec(self): - gns = {}; lns= {} + gns = {}; lns = {} exec("'docstring'\n" - "__annotations__[1] = 2\n" "x: int = 5\n", gns, lns) - self.assertEqual(lns["__annotations__"], {1: 2, 'x': int}) - with self.assertRaises(KeyError): - gns['__annotations__'] - - def test_var_annot_custom_maps(self): - # tests with custom locals() and __annotations__ - ns = {'__annotations__': CNS()} - exec('X: int; Z: str = "Z"; (w): complex = 1j', ns) - self.assertEqual(ns['__annotations__']['x'], int) - self.assertEqual(ns['__annotations__']['z'], str) + self.assertEqual(lns["__annotate__"](1), {'x': int}) with self.assertRaises(KeyError): - ns['__annotations__']['w'] - nonloc_ns = {} - class CNS2: - def __init__(self): - self._dct = {} - def __setitem__(self, item, value): - nonlocal nonloc_ns - self._dct[item] = value - nonloc_ns[item] = value - def __getitem__(self, item): - return self._dct[item] - exec('x: int = 1', {}, CNS2()) - self.assertEqual(nonloc_ns['__annotations__']['x'], int) - - def test_var_annot_refleak(self): - # complex case: custom locals plus custom __annotations__ - # this was causing refleak - cns = CNS() - nonloc_ns = {'__annotations__': cns} - class CNS2: - def __init__(self): - self._dct = {'__annotations__': cns} - def __setitem__(self, item, value): - nonlocal nonloc_ns - self._dct[item] = value - nonloc_ns[item] = value - def __getitem__(self, item): - return self._dct[item] - exec('X: str', {}, CNS2()) - self.assertEqual(nonloc_ns['__annotations__']['x'], str) + gns['__annotate__'] def test_var_annot_rhs(self): ns = {} diff --git a/Lib/test/test_module/__init__.py b/Lib/test/test_module/__init__.py index 952ba43f72504d..56edd0c637f376 100644 --- a/Lib/test/test_module/__init__.py +++ b/Lib/test/test_module/__init__.py @@ -360,6 +360,8 @@ def test_annotations_are_created_correctly(self): ann_module4 = import_helper.import_fresh_module( 'test.typinganndata.ann_module4', ) + self.assertFalse("__annotations__" in ann_module4.__dict__) + self.assertEqual(ann_module4.__annotations__, {"a": int, "b": str}) self.assertTrue("__annotations__" in ann_module4.__dict__) del ann_module4.__annotations__ self.assertFalse("__annotations__" in ann_module4.__dict__) diff --git a/Lib/test/test_opcodes.py b/Lib/test/test_opcodes.py index 72488b2bb6b4ff..f7cc8331b8d844 100644 --- a/Lib/test/test_opcodes.py +++ b/Lib/test/test_opcodes.py @@ -39,16 +39,19 @@ class C: pass def test_use_existing_annotations(self): ns = {'__annotations__': {1: 2}} exec('x: int', ns) - self.assertEqual(ns['__annotations__'], {'x': int, 1: 2}) + self.assertEqual(ns['__annotations__'], {1: 2}) def test_do_not_recreate_annotations(self): # Don't rely on the existence of the '__annotations__' global. with support.swap_item(globals(), '__annotations__', {}): - del globals()['__annotations__'] + globals().pop('__annotations__', None) class C: - del __annotations__ - with self.assertRaises(NameError): - x: int + try: + del __annotations__ + except NameError: + pass + x: int + self.assertEqual(C.__annotations__, {"x": int}) def test_raise_class_exceptions(self): diff --git a/Lib/test/test_positional_only_arg.py b/Lib/test/test_positional_only_arg.py index 1a193814d7535d..eea0625012da6d 100644 --- a/Lib/test/test_positional_only_arg.py +++ b/Lib/test/test_positional_only_arg.py @@ -2,6 +2,7 @@ import dis import pickle +import types import unittest from test.support import check_syntax_error @@ -440,7 +441,9 @@ def f(x: not (int is int), /): ... # without constant folding we end up with # COMPARE_OP(is), IS_OP (0) # with constant folding we should expect a IS_OP (1) - codes = [(i.opname, i.argval) for i in dis.get_instructions(g)] + code_obj = next(const for const in g.__code__.co_consts + if isinstance(const, types.CodeType) and const.co_name == "__annotate__") + codes = [(i.opname, i.argval) for i in dis.get_instructions(code_obj)] self.assertNotIn(('UNARY_NOT', None), codes) self.assertIn(('IS_OP', 1), codes) diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py index 46206accbafc36..0c12a3085b12af 100644 --- a/Lib/test/test_pyclbr.py +++ b/Lib/test/test_pyclbr.py @@ -109,6 +109,8 @@ def ismethod(oclass, obj, name): actualMethods = [] for m in py_item.__dict__.keys(): + if m == "__annotate__": + continue if ismethod(py_item, getattr(py_item, m), m): actualMethods.append(m) foundMethods = [] diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index 57e5b8e8abddfa..a17c16cc73cf0e 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -77,6 +77,11 @@ class A(builtins.object) | __weakref__%s class B(builtins.object) + | Methods defined here: + | + | __annotate__(...) + | + | ---------------------------------------------------------------------- | Data descriptors defined here: | | __dict__%s @@ -87,8 +92,6 @@ class B(builtins.object) | Data and other attributes defined here: | | NO_MEANING = 'eggs' - | - | __annotations__ = {'NO_MEANING': } class C(builtins.object) | Methods defined here: @@ -176,6 +179,9 @@ class A(builtins.object) list of weak references to the object class B(builtins.object) + Methods defined here: + __annotate__(...) + ---------------------------------------------------------------------- Data descriptors defined here: __dict__ dictionary for instance variables @@ -184,7 +190,6 @@ class B(builtins.object) ---------------------------------------------------------------------- Data and other attributes defined here: NO_MEANING = 'eggs' - __annotations__ = {'NO_MEANING': } class C(builtins.object) diff --git a/Lib/test/test_pyrepl/test_interact.py b/Lib/test/test_pyrepl/test_interact.py index df97b1354a168e..31f08cdb25e078 100644 --- a/Lib/test/test_pyrepl/test_interact.py +++ b/Lib/test/test_pyrepl/test_interact.py @@ -105,7 +105,7 @@ def test_runsource_shows_syntax_error_for_failed_compilation(self): def test_no_active_future(self): console = InteractiveColoredConsole() - source = "x: int = 1; print(__annotations__)" + source = "x: int = 1; print(__annotate__(1))" f = io.StringIO() with contextlib.redirect_stdout(f): result = console.runsource(source) diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py index ef2a228b15ed4e..a4b111e865c86e 100644 --- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -205,12 +205,14 @@ def test_assigned(self): def test_annotated(self): st1 = symtable.symtable('def f():\n x: int\n', 'test', 'exec') - st2 = st1.get_children()[0] + st2 = st1.get_children()[1] + self.assertEqual(st2.get_type(), "function") self.assertTrue(st2.lookup('x').is_local()) self.assertTrue(st2.lookup('x').is_annotated()) self.assertFalse(st2.lookup('x').is_global()) st3 = symtable.symtable('def f():\n x = 1\n', 'test', 'exec') - st4 = st3.get_children()[0] + st4 = st3.get_children()[1] + self.assertEqual(st4.get_type(), "function") self.assertTrue(st4.lookup('x').is_local()) self.assertFalse(st4.lookup('x').is_annotated()) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 5035de114b5e9d..1895c88d23b70d 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -622,6 +622,7 @@ def test_caret_in_type_annotation(self): def f_with_type(): def foo(a: THIS_DOES_NOT_EXIST ) -> int: return 0 + foo.__annotations__ lineno_f = f_with_type.__code__.co_firstlineno expected_f = ( @@ -629,7 +630,9 @@ def foo(a: THIS_DOES_NOT_EXIST ) -> int: f' File "{__file__}", line {self.callable_line}, in get_exception\n' ' callable()\n' ' ~~~~~~~~^^\n' - f' File "{__file__}", line {lineno_f+1}, in f_with_type\n' + f' File "{__file__}", line {lineno_f+3}, in f_with_type\n' + ' foo.__annotations__\n' + f' File "{__file__}", line {lineno_f+1}, in __annotate__\n' ' def foo(a: THIS_DOES_NOT_EXIST ) -> int:\n' ' ^^^^^^^^^^^^^^^^^^^\n' ) diff --git a/Lib/test/test_type_annotations.py b/Lib/test/test_type_annotations.py index 5e3c3347a41571..a9be1f5aa84681 100644 --- a/Lib/test/test_type_annotations.py +++ b/Lib/test/test_type_annotations.py @@ -1,7 +1,12 @@ import textwrap import types import unittest -from test.support import run_code +from test.support import run_code, check_syntax_error + +VALUE = 1 +FORWARDREF = 2 +SOURCE = 3 + class TypeAnnotationTests(unittest.TestCase): @@ -49,6 +54,7 @@ def test_annotations_are_created_correctly(self): class C: a:int=3 b:str=4 + self.assertEqual(C.__annotations__, {"a": int, "b": str}) self.assertTrue("__annotations__" in C.__dict__) del C.__annotations__ self.assertFalse("__annotations__" in C.__dict__) @@ -106,6 +112,13 @@ class D(metaclass=C): self.assertEqual(D.__annotations__, {}) +def build_module(code: str, name: str = "top") -> types.ModuleType: + ns = run_code(code) + mod = types.ModuleType(name) + mod.__dict__.update(ns) + return mod + + class TestSetupAnnotations(unittest.TestCase): def check(self, code: str): code = textwrap.dedent(code) @@ -113,11 +126,10 @@ def check(self, code: str): with self.subTest(scope=scope): if scope == "class": code = f"class C:\n{textwrap.indent(code, ' ')}" - ns = run_code(code) - if scope == "class": + ns = run_code(code) annotations = ns["C"].__annotations__ else: - annotations = ns["__annotations__"] + annotations = build_module(code).__annotations__ self.assertEqual(annotations, {"x": int}) def test_top_level(self): @@ -256,3 +268,146 @@ def check_annotations(self, f): # Setting f.__annotations__ also clears __annotate__ f.__annotations__ = {"z": 43} self.assertIs(f.__annotate__, None) + + +class DeferredEvaluationTests(unittest.TestCase): + def test_function(self): + def func(x: undefined, /, y: undefined, *args: undefined, z: undefined, **kwargs: undefined) -> undefined: + pass + + with self.assertRaises(NameError): + func.__annotations__ + + undefined = 1 + self.assertEqual(func.__annotations__, { + "x": 1, + "y": 1, + "args": 1, + "z": 1, + "kwargs": 1, + "return": 1, + }) + + def test_async_function(self): + async def func(x: undefined, /, y: undefined, *args: undefined, z: undefined, **kwargs: undefined) -> undefined: + pass + + with self.assertRaises(NameError): + func.__annotations__ + + undefined = 1 + self.assertEqual(func.__annotations__, { + "x": 1, + "y": 1, + "args": 1, + "z": 1, + "kwargs": 1, + "return": 1, + }) + + def test_class(self): + class X: + a: undefined + + with self.assertRaises(NameError): + X.__annotations__ + + undefined = 1 + self.assertEqual(X.__annotations__, {"a": 1}) + + def test_module(self): + ns = run_code("x: undefined = 1") + anno = ns["__annotate__"] + with self.assertRaises(NotImplementedError): + anno(2) + + with self.assertRaises(NameError): + anno(1) + + ns["undefined"] = 1 + self.assertEqual(anno(1), {"x": 1}) + + def test_class_scoping(self): + class Outer: + def meth(self, x: Nested): ... + x: Nested + class Nested: ... + + self.assertEqual(Outer.meth.__annotations__, {"x": Outer.Nested}) + self.assertEqual(Outer.__annotations__, {"x": Outer.Nested}) + + def test_no_exotic_expressions(self): + check_syntax_error(self, "def func(x: (yield)): ...", "yield expression cannot be used within an annotation") + check_syntax_error(self, "def func(x: (yield from x)): ...", "yield expression cannot be used within an annotation") + check_syntax_error(self, "def func(x: (y := 3)): ...", "named expression cannot be used within an annotation") + check_syntax_error(self, "def func(x: (await 42)): ...", "await expression cannot be used within an annotation") + + def test_no_exotic_expressions_in_unevaluated_annotations(self): + preludes = [ + "", + "class X: ", + "def f(): ", + "async def f(): ", + ] + for prelude in preludes: + with self.subTest(prelude=prelude): + check_syntax_error(self, prelude + "(x): (yield)", "yield expression cannot be used within an annotation") + check_syntax_error(self, prelude + "(x): (yield from x)", "yield expression cannot be used within an annotation") + check_syntax_error(self, prelude + "(x): (y := 3)", "named expression cannot be used within an annotation") + check_syntax_error(self, prelude + "(x): (await 42)", "await expression cannot be used within an annotation") + + def test_ignore_non_simple_annotations(self): + ns = run_code("class X: (y): int") + self.assertEqual(ns["X"].__annotations__, {}) + ns = run_code("class X: int.b: int") + self.assertEqual(ns["X"].__annotations__, {}) + ns = run_code("class X: int[str]: int") + self.assertEqual(ns["X"].__annotations__, {}) + + def test_generated_annotate(self): + def func(x: int): + pass + class X: + x: int + mod = build_module("x: int") + for obj in (func, X, mod): + with self.subTest(obj=obj): + annotate = obj.__annotate__ + self.assertIsInstance(annotate, types.FunctionType) + self.assertEqual(annotate.__name__, "__annotate__") + with self.assertRaises(NotImplementedError): + annotate(FORWARDREF) + with self.assertRaises(NotImplementedError): + annotate(SOURCE) + with self.assertRaises(NotImplementedError): + annotate(None) + self.assertEqual(annotate(VALUE), {"x": int}) + + def test_comprehension_in_annotation(self): + # This crashed in an earlier version of the code + ns = run_code("x: [y for y in range(10)]") + self.assertEqual(ns["__annotate__"](1), {"x": list(range(10))}) + + def test_future_annotations(self): + code = """ + from __future__ import annotations + + def f(x: int) -> int: pass + """ + ns = run_code(code) + f = ns["f"] + self.assertIsInstance(f.__annotate__, types.FunctionType) + annos = {"x": "int", "return": "int"} + self.assertEqual(f.__annotate__(VALUE), annos) + self.assertEqual(f.__annotations__, annos) + + def test_name_clash_with_format(self): + # this test would fail if __annotate__'s parameter was called "format" + code = """ + class format: pass + + def f(x: format): pass + """ + ns = run_code(code) + f = ns["f"] + self.assertEqual(f.__annotations__, {"x": ns["format"]}) diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index dac55ceb9e99e0..9800b3b6a7da29 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -6634,7 +6634,7 @@ def test_get_type_hints_from_various_objects(self): gth(None) def test_get_type_hints_modules(self): - ann_module_type_hints = {1: 2, 'f': Tuple[int, int], 'x': int, 'y': str, 'u': int | float} + ann_module_type_hints = {'f': Tuple[int, int], 'x': int, 'y': str, 'u': int | float} self.assertEqual(gth(ann_module), ann_module_type_hints) self.assertEqual(gth(ann_module2), {}) self.assertEqual(gth(ann_module3), {}) @@ -6652,7 +6652,7 @@ def test_get_type_hints_classes(self): self.assertEqual(gth(ann_module.C), # gth will find the right globalns {'y': Optional[ann_module.C]}) self.assertIsInstance(gth(ann_module.j_class), dict) - self.assertEqual(gth(ann_module.M), {'123': 123, 'o': type}) + self.assertEqual(gth(ann_module.M), {'o': type}) self.assertEqual(gth(ann_module.D), {'j': str, 'k': str, 'y': Optional[ann_module.C]}) self.assertEqual(gth(ann_module.Y), {'z': int}) diff --git a/Lib/test/typinganndata/ann_module.py b/Lib/test/typinganndata/ann_module.py index 5081e6b58345a9..e1a1792cb4a867 100644 --- a/Lib/test/typinganndata/ann_module.py +++ b/Lib/test/typinganndata/ann_module.py @@ -8,8 +8,6 @@ from typing import Optional from functools import wraps -__annotations__[1] = 2 - class C: x = 5; y: Optional['C'] = None @@ -18,8 +16,6 @@ class C: x: int = 5; y: str = x; f: Tuple[int, int] class M(type): - - __annotations__['123'] = 123 o: type = object (pars): bool = True diff --git a/Lib/typing.py b/Lib/typing.py index be49aa63464f05..7a9149d3f3c2c1 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -2412,7 +2412,7 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False): base_globals = getattr(sys.modules.get(base.__module__, None), '__dict__', {}) else: base_globals = globalns - ann = base.__dict__.get('__annotations__', {}) + ann = getattr(base, '__annotations__', {}) if isinstance(ann, types.GetSetDescriptorType): ann = {} base_locals = dict(vars(base)) if localns is None else localns @@ -2970,7 +2970,12 @@ def __new__(cls, typename, bases, ns): raise TypeError( 'can only inherit from a NamedTuple type and Generic') bases = tuple(tuple if base is _NamedTuple else base for base in bases) - types = ns.get('__annotations__', {}) + if "__annotations__" in ns: + types = ns["__annotations__"] + elif "__annotate__" in ns: + types = ns["__annotate__"](1) # VALUE + else: + types = {} default_names = [] for field_name in types: if field_name in ns: @@ -3131,7 +3136,12 @@ def __new__(cls, name, bases, ns, total=True): tp_dict.__orig_bases__ = bases annotations = {} - own_annotations = ns.get('__annotations__', {}) + if "__annotations__" in ns: + own_annotations = ns["__annotations__"] + elif "__annotate__" in ns: + own_annotations = ns["__annotate__"](1) # VALUE + else: + own_annotations = {} msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type" own_annotations = { n: _type_check(tp, msg, module=tp_dict.__module__) @@ -3143,7 +3153,12 @@ def __new__(cls, name, bases, ns, total=True): mutable_keys = set() for base in bases: - annotations.update(base.__dict__.get('__annotations__', {})) + # TODO: Avoid eagerly evaluating annotations in VALUE format. + # Instead, evaluate in FORWARDREF format to figure out which + # keys have Required/NotRequired/ReadOnly qualifiers, and create + # a new __annotate__ function for the resulting TypedDict that + # combines the annotations from this class and its parents. + annotations.update(base.__annotations__) base_required = base.__dict__.get('__required_keys__', set()) required_keys |= base_required diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-22-06-22-47.gh-issue-119180.vZMiXm.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-22-06-22-47.gh-issue-119180.vZMiXm.rst new file mode 100644 index 00000000000000..265ffb32e6a1f9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-22-06-22-47.gh-issue-119180.vZMiXm.rst @@ -0,0 +1 @@ +Evaluation of annotations is now deferred. See :pep:`649` for details. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 413ad1105f9428..05c17ac334b69f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3975,6 +3975,11 @@ dummy_func( assert(func_obj->func_defaults == NULL); func_obj->func_defaults = attr; break; + case MAKE_FUNCTION_ANNOTATE: + assert(PyCallable_Check(attr)); + assert(func_obj->func_annotate == NULL); + func_obj->func_annotate = attr; + break; default: Py_UNREACHABLE(); } diff --git a/Python/compile.c b/Python/compile.c index cb724154206b7e..c3372766d0bd50 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -132,7 +132,7 @@ enum { COMPILER_SCOPE_ASYNC_FUNCTION, COMPILER_SCOPE_LAMBDA, COMPILER_SCOPE_COMPREHENSION, - COMPILER_SCOPE_TYPEPARAMS, + COMPILER_SCOPE_ANNOTATIONS, }; @@ -142,6 +142,15 @@ typedef _PyInstructionSequence instr_sequence; #define INITIAL_INSTR_SEQUENCE_SIZE 100 #define INITIAL_INSTR_SEQUENCE_LABELS_MAP_SIZE 10 +static const int compare_masks[] = { + [Py_LT] = COMPARISON_LESS_THAN, + [Py_LE] = COMPARISON_LESS_THAN | COMPARISON_EQUALS, + [Py_EQ] = COMPARISON_EQUALS, + [Py_NE] = COMPARISON_NOT_EQUALS, + [Py_GT] = COMPARISON_GREATER_THAN, + [Py_GE] = COMPARISON_GREATER_THAN | COMPARISON_EQUALS, +}; + /* * Resize the array if index is out of range. * @@ -208,6 +217,7 @@ struct compiler_unit { PyObject *u_private; /* for private name mangling */ PyObject *u_static_attributes; /* for class: attributes accessed via self.X */ + PyObject *u_deferred_annotations; /* AnnAssign nodes deferred to the end of compilation */ instr_sequence *u_instr_sequence; /* codegen output */ @@ -330,6 +340,8 @@ static int compiler_pattern(struct compiler *, pattern_ty, pattern_context *); static int compiler_match(struct compiler *, stmt_ty); static int compiler_pattern_subpattern(struct compiler *, pattern_ty, pattern_context *); +static int compiler_make_closure(struct compiler *c, location loc, + PyCodeObject *co, Py_ssize_t flags); static PyCodeObject *optimize_and_assemble(struct compiler *, int addNone); @@ -545,6 +557,7 @@ compiler_unit_free(struct compiler_unit *u) Py_CLEAR(u->u_metadata.u_fasthidden); Py_CLEAR(u->u_private); Py_CLEAR(u->u_static_attributes); + Py_CLEAR(u->u_deferred_annotations); PyMem_Free(u); } @@ -582,8 +595,8 @@ compiler_set_qualname(struct compiler *c) capsule = PyList_GET_ITEM(c->c_stack, stack_size - 1); parent = (struct compiler_unit *)PyCapsule_GetPointer(capsule, CAPSULE_NAME); assert(parent); - if (parent->u_scope_type == COMPILER_SCOPE_TYPEPARAMS) { - /* The parent is a type parameter scope, so we need to + if (parent->u_scope_type == COMPILER_SCOPE_ANNOTATIONS) { + /* The parent is an annotation scope, so we need to look at the grandparent. */ if (stack_size == 2) { // If we're immediately within the module, we can skip @@ -1128,6 +1141,7 @@ compiler_enter_scope(struct compiler *c, identifier name, } u->u_private = NULL; + u->u_deferred_annotations = NULL; if (scope_type == COMPILER_SCOPE_CLASS) { u->u_static_attributes = PySet_New(0); if (!u->u_static_attributes) { @@ -1209,85 +1223,6 @@ compiler_exit_scope(struct compiler *c) PyErr_SetRaisedException(exc); } -/* Search if variable annotations are present statically in a block. */ - -static bool -find_ann(asdl_stmt_seq *stmts) -{ - int i, j, res = 0; - stmt_ty st; - - for (i = 0; i < asdl_seq_LEN(stmts); i++) { - st = (stmt_ty)asdl_seq_GET(stmts, i); - switch (st->kind) { - case AnnAssign_kind: - return true; - case For_kind: - res = find_ann(st->v.For.body) || - find_ann(st->v.For.orelse); - break; - case AsyncFor_kind: - res = find_ann(st->v.AsyncFor.body) || - find_ann(st->v.AsyncFor.orelse); - break; - case While_kind: - res = find_ann(st->v.While.body) || - find_ann(st->v.While.orelse); - break; - case If_kind: - res = find_ann(st->v.If.body) || - find_ann(st->v.If.orelse); - break; - case With_kind: - res = find_ann(st->v.With.body); - break; - case AsyncWith_kind: - res = find_ann(st->v.AsyncWith.body); - break; - case Try_kind: - for (j = 0; j < asdl_seq_LEN(st->v.Try.handlers); j++) { - excepthandler_ty handler = (excepthandler_ty)asdl_seq_GET( - st->v.Try.handlers, j); - if (find_ann(handler->v.ExceptHandler.body)) { - return true; - } - } - res = find_ann(st->v.Try.body) || - find_ann(st->v.Try.finalbody) || - find_ann(st->v.Try.orelse); - break; - case TryStar_kind: - for (j = 0; j < asdl_seq_LEN(st->v.TryStar.handlers); j++) { - excepthandler_ty handler = (excepthandler_ty)asdl_seq_GET( - st->v.TryStar.handlers, j); - if (find_ann(handler->v.ExceptHandler.body)) { - return true; - } - } - res = find_ann(st->v.TryStar.body) || - find_ann(st->v.TryStar.finalbody) || - find_ann(st->v.TryStar.orelse); - break; - case Match_kind: - for (j = 0; j < asdl_seq_LEN(st->v.Match.cases); j++) { - match_case_ty match_case = (match_case_ty)asdl_seq_GET( - st->v.Match.cases, j); - if (find_ann(match_case->body)) { - return true; - } - } - break; - default: - res = false; - break; - } - if (res) { - break; - } - } - return res; -} - /* * Frame block handling functions */ @@ -1502,6 +1437,47 @@ compiler_unwind_fblock_stack(struct compiler *c, location *ploc, return SUCCESS; } +static int +compiler_setup_annotations_scope(struct compiler *c, location loc, + void *key, PyObject *name) +{ + if (compiler_enter_scope(c, name, COMPILER_SCOPE_ANNOTATIONS, + key, loc.lineno) == -1) { + return ERROR; + } + c->u->u_metadata.u_posonlyargcount = 1; + // if .format != 1: raise NotImplementedError + _Py_DECLARE_STR(format, ".format"); + ADDOP_I(c, loc, LOAD_FAST, 0); + ADDOP_LOAD_CONST(c, loc, _PyLong_GetOne()); + ADDOP_I(c, loc, COMPARE_OP, (Py_NE << 5) | compare_masks[Py_NE]); + NEW_JUMP_TARGET_LABEL(c, body); + ADDOP_JUMP(c, loc, POP_JUMP_IF_FALSE, body); + ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, CONSTANT_NOTIMPLEMENTEDERROR); + ADDOP_I(c, loc, RAISE_VARARGS, 1); + USE_LABEL(c, body); + return 0; +} + +static int +compiler_leave_annotations_scope(struct compiler *c, location loc, + Py_ssize_t annotations_len) +{ + ADDOP_I(c, loc, BUILD_MAP, annotations_len); + ADDOP_IN_SCOPE(c, loc, RETURN_VALUE); + PyCodeObject *co = optimize_and_assemble(c, 1); + compiler_exit_scope(c); + if (co == NULL) { + return ERROR; + } + if (compiler_make_closure(c, loc, co, 0) < 0) { + Py_DECREF(co); + return ERROR; + } + Py_DECREF(co); + return 0; +} + /* Compile a sequence of statements, checking for a docstring and for annotations. */ @@ -1517,34 +1493,79 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts) stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); loc = LOC(st); } - /* Every annotated class and module should have __annotations__. */ - if (find_ann(stmts)) { + /* If from __future__ import annotations is active, + * every annotated class and module should have __annotations__. + * Else __annotate__ is created when necessary. */ + if ((c->c_future.ff_features & CO_FUTURE_ANNOTATIONS) && c->u->u_ste->ste_annotations_used) { ADDOP(c, loc, SETUP_ANNOTATIONS); } if (!asdl_seq_LEN(stmts)) { return SUCCESS; } Py_ssize_t first_instr = 0; - PyObject *docstring = _PyAST_GetDocString(stmts); - if (docstring) { - first_instr = 1; - /* if not -OO mode, set docstring */ - if (c->c_optimize < 2) { - PyObject *cleandoc = _PyCompile_CleanDoc(docstring); - if (cleandoc == NULL) { - return ERROR; + if (!c->c_interactive) { + PyObject *docstring = _PyAST_GetDocString(stmts); + if (docstring) { + first_instr = 1; + /* if not -OO mode, set docstring */ + if (c->c_optimize < 2) { + PyObject *cleandoc = _PyCompile_CleanDoc(docstring); + if (cleandoc == NULL) { + return ERROR; + } + stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); + assert(st->kind == Expr_kind); + location loc = LOC(st->v.Expr.value); + ADDOP_LOAD_CONST(c, loc, cleandoc); + Py_DECREF(cleandoc); + RETURN_IF_ERROR(compiler_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store)); } - stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); - assert(st->kind == Expr_kind); - location loc = LOC(st->v.Expr.value); - ADDOP_LOAD_CONST(c, loc, cleandoc); - Py_DECREF(cleandoc); - RETURN_IF_ERROR(compiler_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store)); } } for (Py_ssize_t i = first_instr; i < asdl_seq_LEN(stmts); i++) { VISIT(c, stmt, (stmt_ty)asdl_seq_GET(stmts, i)); } + // If there are annotations and the future import is not on, we + // collect the annotations in a separate pass and generate an + // __annotate__ function. See PEP 649. + if (!(c->c_future.ff_features & CO_FUTURE_ANNOTATIONS) && + c->u->u_deferred_annotations != NULL) { + + // It's possible that ste_annotations_block is set but + // u_deferred_annotations is not, because the former is still + // set if there are only non-simple annotations (i.e., annotations + // for attributes, subscripts, or parenthesized names). However, the + // reverse should not be possible. + assert(c->u->u_ste->ste_annotation_block != NULL); + PyObject *deferred_anno = Py_NewRef(c->u->u_deferred_annotations); + void *key = (void *)((uintptr_t)c->u->u_ste->ste_id + 1); + if (compiler_setup_annotations_scope(c, loc, key, + c->u->u_ste->ste_annotation_block->ste_name) == -1) { + Py_DECREF(deferred_anno); + return ERROR; + } + Py_ssize_t annotations_len = PyList_Size(deferred_anno); + for (Py_ssize_t i = 0; i < annotations_len; i++) { + PyObject *ptr = PyList_GET_ITEM(deferred_anno, i); + stmt_ty st = (stmt_ty)PyLong_AsVoidPtr(ptr); + if (st == NULL) { + compiler_exit_scope(c); + Py_DECREF(deferred_anno); + return ERROR; + } + PyObject *mangled = _Py_Mangle(c->u->u_private, st->v.AnnAssign.target->v.Name.id); + ADDOP_LOAD_CONST_NEW(c, LOC(st), mangled); + VISIT(c, expr, st->v.AnnAssign.annotation); + } + Py_DECREF(deferred_anno); + + RETURN_IF_ERROR( + compiler_leave_annotations_scope(c, loc, annotations_len) + ); + RETURN_IF_ERROR( + compiler_nameop(c, loc, &_Py_ID(__annotate__), Store) + ); + } return SUCCESS; } @@ -1559,11 +1580,10 @@ compiler_codegen(struct compiler *c, mod_ty mod) } break; case Interactive_kind: - if (find_ann(mod->v.Interactive.body)) { - ADDOP(c, loc, SETUP_ANNOTATIONS); - } c->c_interactive = 1; - VISIT_SEQ(c, stmt, mod->v.Interactive.body); + if (compiler_body(c, loc, mod->v.Interactive.body) < 0) { + return ERROR; + } break; case Expression_kind: VISIT(c, expr, mod->v.Expression.body); @@ -1702,6 +1722,9 @@ compiler_make_closure(struct compiler *c, location loc, if (flags & MAKE_FUNCTION_ANNOTATIONS) { ADDOP_I(c, loc, SET_FUNCTION_ATTRIBUTE, MAKE_FUNCTION_ANNOTATIONS); } + if (flags & MAKE_FUNCTION_ANNOTATE) { + ADDOP_I(c, loc, SET_FUNCTION_ATTRIBUTE, MAKE_FUNCTION_ANNOTATE); + } if (flags & MAKE_FUNCTION_KWDEFAULTS) { ADDOP_I(c, loc, SET_FUNCTION_ATTRIBUTE, MAKE_FUNCTION_KWDEFAULTS); } @@ -1833,7 +1856,7 @@ compiler_visit_argannotation(struct compiler *c, identifier id, VISIT(c, expr, annotation); } } - *annotations_len += 2; + *annotations_len += 1; return SUCCESS; } @@ -1856,43 +1879,76 @@ compiler_visit_argannotations(struct compiler *c, asdl_arg_seq* args, } static int -compiler_visit_annotations(struct compiler *c, location loc, - arguments_ty args, expr_ty returns) +compiler_visit_annotations_in_scope(struct compiler *c, location loc, + arguments_ty args, expr_ty returns, + Py_ssize_t *annotations_len) { - /* Push arg annotation names and values. - The expressions are evaluated out-of-order wrt the source code. - - Return -1 on error, 0 if no annotations pushed, 1 if a annotations is pushed. - */ - Py_ssize_t annotations_len = 0; - RETURN_IF_ERROR( - compiler_visit_argannotations(c, args->args, &annotations_len, loc)); + compiler_visit_argannotations(c, args->args, annotations_len, loc)); RETURN_IF_ERROR( - compiler_visit_argannotations(c, args->posonlyargs, &annotations_len, loc)); + compiler_visit_argannotations(c, args->posonlyargs, annotations_len, loc)); if (args->vararg && args->vararg->annotation) { RETURN_IF_ERROR( compiler_visit_argannotation(c, args->vararg->arg, - args->vararg->annotation, &annotations_len, loc)); + args->vararg->annotation, annotations_len, loc)); } RETURN_IF_ERROR( - compiler_visit_argannotations(c, args->kwonlyargs, &annotations_len, loc)); + compiler_visit_argannotations(c, args->kwonlyargs, annotations_len, loc)); if (args->kwarg && args->kwarg->annotation) { RETURN_IF_ERROR( compiler_visit_argannotation(c, args->kwarg->arg, - args->kwarg->annotation, &annotations_len, loc)); + args->kwarg->annotation, annotations_len, loc)); } RETURN_IF_ERROR( - compiler_visit_argannotation(c, &_Py_ID(return), returns, &annotations_len, loc)); + compiler_visit_argannotation(c, &_Py_ID(return), returns, annotations_len, loc)); - if (annotations_len) { - ADDOP_I(c, loc, BUILD_TUPLE, annotations_len); - return 1; + return 0; +} + +static int +compiler_visit_annotations(struct compiler *c, location loc, + arguments_ty args, expr_ty returns) +{ + /* Push arg annotation names and values. + The expressions are evaluated separately from the rest of the source code. + + Return -1 on error, or a combination of flags to add to the function. + */ + Py_ssize_t annotations_len = 0; + + PySTEntryObject *ste; + if (_PySymtable_LookupOptional(c->c_st, args, &ste) < 0) { + return ERROR; + } + assert(ste != NULL); + bool annotations_used = ste->ste_annotations_used; + + if (annotations_used) { + if (compiler_setup_annotations_scope(c, loc, (void *)args, + ste->ste_name) < 0) { + Py_DECREF(ste); + return ERROR; + } + } + Py_DECREF(ste); + + if (compiler_visit_annotations_in_scope(c, loc, args, returns, &annotations_len) < 0) { + if (annotations_used) { + compiler_exit_scope(c); + } + return ERROR; + } + + if (annotations_used) { + RETURN_IF_ERROR( + compiler_leave_annotations_scope(c, loc, annotations_len) + ); + return MAKE_FUNCTION_ANNOTATE; } return 0; @@ -2001,7 +2057,7 @@ compiler_type_param_bound_or_default(struct compiler *c, expr_ty e, identifier name, void *key, bool allow_starred) { - if (compiler_enter_scope(c, name, COMPILER_SCOPE_TYPEPARAMS, + if (compiler_enter_scope(c, name, COMPILER_SCOPE_ANNOTATIONS, key, e->lineno) == -1) { return ERROR; } @@ -2220,7 +2276,6 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async) asdl_expr_seq *decos; asdl_type_param_seq *type_params; Py_ssize_t funcflags; - int annotations; int firstlineno; if (is_async) { @@ -2274,7 +2329,7 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async) if (!type_params_name) { return ERROR; } - if (compiler_enter_scope(c, type_params_name, COMPILER_SCOPE_TYPEPARAMS, + if (compiler_enter_scope(c, type_params_name, COMPILER_SCOPE_ANNOTATIONS, (void *)type_params, firstlineno) == -1) { Py_DECREF(type_params_name); return ERROR; @@ -2286,16 +2341,14 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async) } } - annotations = compiler_visit_annotations(c, loc, args, returns); - if (annotations < 0) { + int annotations_flag = compiler_visit_annotations(c, loc, args, returns); + if (annotations_flag < 0) { if (is_generic) { compiler_exit_scope(c); } return ERROR; } - if (annotations > 0) { - funcflags |= MAKE_FUNCTION_ANNOTATIONS; - } + funcflags |= annotations_flag; if (compiler_function_body(c, s, is_async, funcflags, firstlineno) < 0) { if (is_generic) { @@ -2510,7 +2563,7 @@ compiler_class(struct compiler *c, stmt_ty s) if (!type_params_name) { return ERROR; } - if (compiler_enter_scope(c, type_params_name, COMPILER_SCOPE_TYPEPARAMS, + if (compiler_enter_scope(c, type_params_name, COMPILER_SCOPE_ANNOTATIONS, (void *)type_params, firstlineno) == -1) { Py_DECREF(type_params_name); return ERROR; @@ -2630,7 +2683,7 @@ compiler_typealias(struct compiler *c, stmt_ty s) if (!type_params_name) { return ERROR; } - if (compiler_enter_scope(c, type_params_name, COMPILER_SCOPE_TYPEPARAMS, + if (compiler_enter_scope(c, type_params_name, COMPILER_SCOPE_ANNOTATIONS, (void *)type_params, loc.lineno) == -1) { Py_DECREF(type_params_name); return ERROR; @@ -2719,15 +2772,6 @@ check_compare(struct compiler *c, expr_ty e) return SUCCESS; } -static const int compare_masks[] = { - [Py_LT] = COMPARISON_LESS_THAN, - [Py_LE] = COMPARISON_LESS_THAN | COMPARISON_EQUALS, - [Py_EQ] = COMPARISON_EQUALS, - [Py_NE] = COMPARISON_NOT_EQUALS, - [Py_GT] = COMPARISON_GREATER_THAN, - [Py_GE] = COMPARISON_GREATER_THAN | COMPARISON_EQUALS, -}; - static int compiler_addcompare(struct compiler *c, location loc, cmpop_ty op) { @@ -6366,7 +6410,8 @@ compiler_annassign(struct compiler *c, stmt_ty s) { location loc = LOC(s); expr_ty targ = s->v.AnnAssign.target; - PyObject* mangled; + bool future_annotations = c->c_future.ff_features & CO_FUTURE_ANNOTATIONS; + PyObject *mangled; assert(s->kind == AnnAssign_kind); @@ -6384,16 +6429,30 @@ compiler_annassign(struct compiler *c, stmt_ty s) if (s->v.AnnAssign.simple && (c->u->u_scope_type == COMPILER_SCOPE_MODULE || c->u->u_scope_type == COMPILER_SCOPE_CLASS)) { - if (c->c_future.ff_features & CO_FUTURE_ANNOTATIONS) { - VISIT(c, annexpr, s->v.AnnAssign.annotation) + if (future_annotations) { + VISIT(c, annexpr, s->v.AnnAssign.annotation); + ADDOP_NAME(c, loc, LOAD_NAME, &_Py_ID(__annotations__), names); + mangled = _Py_MaybeMangle(c->u->u_private, c->u->u_ste, targ->v.Name.id); + ADDOP_LOAD_CONST_NEW(c, loc, mangled); + ADDOP(c, loc, STORE_SUBSCR); } else { - VISIT(c, expr, s->v.AnnAssign.annotation); + if (c->u->u_deferred_annotations == NULL) { + c->u->u_deferred_annotations = PyList_New(0); + if (c->u->u_deferred_annotations == NULL) { + return ERROR; + } + } + PyObject *ptr = PyLong_FromVoidPtr((void *)s); + if (ptr == NULL) { + return ERROR; + } + if (PyList_Append(c->u->u_deferred_annotations, ptr) < 0) { + Py_DECREF(ptr); + return ERROR; + } + Py_DECREF(ptr); } - ADDOP_NAME(c, loc, LOAD_NAME, &_Py_ID(__annotations__), names); - mangled = _Py_MaybeMangle(c->u->u_private, c->u->u_ste, targ->v.Name.id); - ADDOP_LOAD_CONST_NEW(c, loc, mangled); - ADDOP(c, loc, STORE_SUBSCR); } break; case Attribute_kind: @@ -6419,7 +6478,7 @@ compiler_annassign(struct compiler *c, stmt_ty s) return ERROR; } /* Annotation is evaluated last. */ - if (!s->v.AnnAssign.simple && check_annotation(c, s) < 0) { + if (future_annotations && !s->v.AnnAssign.simple && check_annotation(c, s) < 0) { return ERROR; } return SUCCESS; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index bab629684c53f6..470c82d938ab7c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4061,6 +4061,11 @@ assert(func_obj->func_defaults == NULL); func_obj->func_defaults = attr; break; + case MAKE_FUNCTION_ANNOTATE: + assert(PyCallable_Check(attr)); + assert(func_obj->func_annotate == NULL); + func_obj->func_annotate = attr; + break; default: Py_UNREACHABLE(); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 355be966cbb84a..0274f8b7a48c3c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5450,6 +5450,11 @@ assert(func_obj->func_defaults == NULL); func_obj->func_defaults = attr; break; + case MAKE_FUNCTION_ANNOTATE: + assert(PyCallable_Check(attr)); + assert(func_obj->func_annotate == NULL); + func_obj->func_annotate = attr; + break; default: Py_UNREACHABLE(); } diff --git a/Python/symtable.c b/Python/symtable.c index 7e452cdb13badf..287bc2bd58107d 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -112,6 +112,7 @@ ste_new(struct symtable *st, identifier name, _Py_block_ty block, ste->ste_varkeywords = 0; ste->ste_opt_lineno = 0; ste->ste_opt_col_offset = 0; + ste->ste_annotations_used = 0; ste->ste_lineno = lineno; ste->ste_col_offset = col_offset; ste->ste_end_lineno = end_lineno; @@ -132,6 +133,7 @@ ste_new(struct symtable *st, identifier name, _Py_block_ty block, ste->ste_can_see_class_scope = 0; ste->ste_comp_iter_expr = 0; ste->ste_needs_classdict = 0; + ste->ste_annotation_block = NULL; ste->ste_symbols = PyDict_New(); ste->ste_varnames = PyList_New(0); @@ -167,6 +169,7 @@ ste_dealloc(PySTEntryObject *ste) Py_XDECREF(ste->ste_varnames); Py_XDECREF(ste->ste_children); Py_XDECREF(ste->ste_directives); + Py_XDECREF(ste->ste_annotation_block); Py_XDECREF(ste->ste_mangled_names); PyObject_Free(ste); } @@ -245,10 +248,11 @@ static int symtable_visit_alias(struct symtable *st, alias_ty); static int symtable_visit_comprehension(struct symtable *st, comprehension_ty); static int symtable_visit_keyword(struct symtable *st, keyword_ty); static int symtable_visit_params(struct symtable *st, asdl_arg_seq *args); -static int symtable_visit_annotation(struct symtable *st, expr_ty annotation); +static int symtable_visit_annotation(struct symtable *st, expr_ty annotation, void *key); static int symtable_visit_argannotations(struct symtable *st, asdl_arg_seq *args); static int symtable_implicit_arg(struct symtable *st, int pos); -static int symtable_visit_annotations(struct symtable *st, stmt_ty, arguments_ty, expr_ty); +static int symtable_visit_annotations(struct symtable *st, stmt_ty, arguments_ty, expr_ty, + struct _symtable_entry *parent_ste); static int symtable_visit_withitem(struct symtable *st, withitem_ty item); static int symtable_visit_match_case(struct symtable *st, match_case_ty m); static int symtable_visit_pattern(struct symtable *st, pattern_ty s); @@ -504,6 +508,21 @@ _PySymtable_Lookup(struct symtable *st, void *key) return (PySTEntryObject *)v; } +int +_PySymtable_LookupOptional(struct symtable *st, void *key, + PySTEntryObject **out) +{ + PyObject *k = PyLong_FromVoidPtr(key); + if (k == NULL) { + *out = NULL; + return -1; + } + int result = PyDict_GetItemRef(st->st_blocks, k, (PyObject **)out); + Py_DECREF(k); + assert(*out == NULL || PySTEntry_Check(*out)); + return result; +} + long _PyST_GetSymbol(PySTEntryObject *ste, PyObject *name) { @@ -525,6 +544,7 @@ int _PyST_IsFunctionLike(PySTEntryObject *ste) { return ste->ste_type == FunctionBlock + || ste->ste_type == AnnotationBlock || ste->ste_type == TypeVarBoundBlock || ste->ste_type == TypeAliasBlock || ste->ste_type == TypeParamBlock; @@ -1317,20 +1337,12 @@ symtable_exit_block(struct symtable *st) } static int -symtable_enter_block(struct symtable *st, identifier name, _Py_block_ty block, - void *ast, int lineno, int col_offset, - int end_lineno, int end_col_offset) +symtable_enter_existing_block(struct symtable *st, PySTEntryObject* ste) { - PySTEntryObject *prev = NULL, *ste; - - ste = ste_new(st, name, block, ast, lineno, col_offset, end_lineno, end_col_offset); - if (ste == NULL) - return 0; if (PyList_Append(st->st_stack, (PyObject *)ste) < 0) { - Py_DECREF(ste); return 0; } - prev = st->st_cur; + PySTEntryObject *prev = st->st_cur; /* bpo-37757: For now, disallow *all* assignment expressions in the * outermost iterator expression of a comprehension, even those inside * a nested comprehension or a lambda expression. @@ -1340,21 +1352,20 @@ symtable_enter_block(struct symtable *st, identifier name, _Py_block_ty block, } /* No need to inherit ste_mangled_names in classes, where all names * are mangled. */ - if (prev && prev->ste_mangled_names != NULL && block != ClassBlock) { + if (prev && prev->ste_mangled_names != NULL && ste->ste_type != ClassBlock) { ste->ste_mangled_names = Py_NewRef(prev->ste_mangled_names); } /* The entry is owned by the stack. Borrow it for st_cur. */ - Py_DECREF(ste); st->st_cur = ste; - /* Annotation blocks shouldn't have any affect on the symbol table since in - * the compilation stage, they will all be transformed to strings. They are - * only created if future 'annotations' feature is activated. */ - if (block == AnnotationBlock) { + /* If "from __future__ import annotations" is active, + * annotation blocks shouldn't have any affect on the symbol table since in + * the compilation stage, they will all be transformed to strings. */ + if (st->st_future->ff_features & CO_FUTURE_ANNOTATIONS && ste->ste_type == AnnotationBlock) { return 1; } - if (block == ModuleBlock) + if (ste->ste_type == ModuleBlock) st->st_global = st->st_cur->ste_symbols; if (prev) { @@ -1365,6 +1376,20 @@ symtable_enter_block(struct symtable *st, identifier name, _Py_block_ty block, return 1; } +static int +symtable_enter_block(struct symtable *st, identifier name, _Py_block_ty block, + void *ast, int lineno, int col_offset, + int end_lineno, int end_col_offset) +{ + PySTEntryObject *ste = ste_new(st, name, block, ast, + lineno, col_offset, end_lineno, end_col_offset); + if (ste == NULL) + return 0; + int result = symtable_enter_existing_block(st, ste); + Py_DECREF(ste); + return result; +} + static long symtable_lookup_entry(struct symtable *st, PySTEntryObject *ste, PyObject *name) { @@ -1643,7 +1668,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) VISIT_QUIT(st, 0); } switch (s->kind) { - case FunctionDef_kind: + case FunctionDef_kind: { if (!symtable_add_def(st, s->v.FunctionDef.name, DEF_LOCAL, LOCATION(s))) VISIT_QUIT(st, 0); if (s->v.FunctionDef.args->defaults) @@ -1665,13 +1690,22 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) } VISIT_SEQ(st, type_param, s->v.FunctionDef.type_params); } + PySTEntryObject *new_ste = ste_new(st, s->v.FunctionDef.name, FunctionBlock, (void *)s, + LOCATION(s)); + if (!new_ste) { + VISIT_QUIT(st, 0); + } + if (!symtable_visit_annotations(st, s, s->v.FunctionDef.args, - s->v.FunctionDef.returns)) + s->v.FunctionDef.returns, new_ste)) { + Py_DECREF(new_ste); VISIT_QUIT(st, 0); - if (!symtable_enter_block(st, s->v.FunctionDef.name, - FunctionBlock, (void *)s, - LOCATION(s))) + } + if (!symtable_enter_existing_block(st, new_ste)) { + Py_DECREF(new_ste); VISIT_QUIT(st, 0); + } + Py_DECREF(new_ste); VISIT(st, arguments, s->v.FunctionDef.args); VISIT_SEQ(st, stmt, s->v.FunctionDef.body); if (!symtable_exit_block(st)) @@ -1681,6 +1715,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) VISIT_QUIT(st, 0); } break; + } case ClassDef_kind: { PyObject *tmp; if (!symtable_add_def(st, s->v.ClassDef.name, DEF_LOCAL, LOCATION(s))) @@ -1776,6 +1811,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) VISIT(st, expr, s->v.Assign.value); break; case AnnAssign_kind: + st->st_cur->ste_annotations_used = 1; if (s->v.AnnAssign.target->kind == Name_kind) { expr_ty e_name = s->v.AnnAssign.target; long cur = symtable_lookup(st, e_name->v.Name.id); @@ -1810,7 +1846,8 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) else { VISIT(st, expr, s->v.AnnAssign.target); } - if (!symtable_visit_annotation(st, s->v.AnnAssign.annotation)) { + if (!symtable_visit_annotation(st, s->v.AnnAssign.annotation, + (void *)((uintptr_t)st->st_cur->ste_id + 1))) { VISIT_QUIT(st, 0); } @@ -1960,7 +1997,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) VISIT_SEQ(st, withitem, s->v.With.items); VISIT_SEQ(st, stmt, s->v.With.body); break; - case AsyncFunctionDef_kind: + case AsyncFunctionDef_kind: { if (!symtable_add_def(st, s->v.AsyncFunctionDef.name, DEF_LOCAL, LOCATION(s))) VISIT_QUIT(st, 0); if (s->v.AsyncFunctionDef.args->defaults) @@ -1983,14 +2020,21 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) } VISIT_SEQ(st, type_param, s->v.AsyncFunctionDef.type_params); } + PySTEntryObject *new_ste = ste_new(st, s->v.FunctionDef.name, FunctionBlock, (void *)s, + LOCATION(s)); + if (!new_ste) { + VISIT_QUIT(st, 0); + } + if (!symtable_visit_annotations(st, s, s->v.AsyncFunctionDef.args, - s->v.AsyncFunctionDef.returns)) + s->v.AsyncFunctionDef.returns, new_ste)) VISIT_QUIT(st, 0); - if (!symtable_enter_block(st, s->v.AsyncFunctionDef.name, - FunctionBlock, (void *)s, - s->lineno, s->col_offset, - s->end_lineno, s->end_col_offset)) + if (!symtable_enter_existing_block(st, new_ste)) { + Py_DECREF(new_ste); VISIT_QUIT(st, 0); + } + Py_DECREF(new_ste); + st->st_cur->ste_coroutine = 1; VISIT(st, arguments, s->v.AsyncFunctionDef.args); VISIT_SEQ(st, stmt, s->v.AsyncFunctionDef.body); @@ -2001,6 +2045,7 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) VISIT_QUIT(st, 0); } break; + } case AsyncWith_kind: VISIT_SEQ(st, withitem, s->v.AsyncWith.items); VISIT_SEQ(st, stmt, s->v.AsyncWith.body); @@ -2444,18 +2489,44 @@ symtable_visit_params(struct symtable *st, asdl_arg_seq *args) } static int -symtable_visit_annotation(struct symtable *st, expr_ty annotation) +symtable_visit_annotation(struct symtable *st, expr_ty annotation, void *key) { - int future_annotations = st->st_future->ff_features & CO_FUTURE_ANNOTATIONS; - if (future_annotations && - !symtable_enter_block(st, &_Py_ID(_annotation), AnnotationBlock, - (void *)annotation, annotation->lineno, - annotation->col_offset, annotation->end_lineno, - annotation->end_col_offset)) { - VISIT_QUIT(st, 0); + struct _symtable_entry *parent_ste = st->st_cur; + if (parent_ste->ste_annotation_block == NULL) { + _Py_block_ty current_type = parent_ste->ste_type; + if (!symtable_enter_block(st, &_Py_ID(__annotate__), AnnotationBlock, + key, LOCATION(annotation))) { + VISIT_QUIT(st, 0); + } + parent_ste->ste_annotation_block = + (struct _symtable_entry *)Py_NewRef(st->st_cur); + int future_annotations = st->st_future->ff_features & CO_FUTURE_ANNOTATIONS; + if (current_type == ClassBlock && !future_annotations) { + st->st_cur->ste_can_see_class_scope = 1; + if (!symtable_add_def(st, &_Py_ID(__classdict__), USE, LOCATION(annotation))) { + return 0; + } + } + + _Py_DECLARE_STR(format, ".format"); + // The generated __annotate__ function takes a single parameter with the + // internal name ".format". + if (!symtable_add_def(st, &_Py_STR(format), DEF_PARAM, + LOCATION(annotation))) { + return 0; + } + if (!symtable_add_def(st, &_Py_STR(format), USE, + LOCATION(annotation))) { + return 0; + } + } + else { + if (!symtable_enter_existing_block(st, parent_ste->ste_annotation_block)) { + VISIT_QUIT(st, 0); + } } VISIT(st, expr, annotation); - if (future_annotations && !symtable_exit_block(st)) { + if (!symtable_exit_block(st)) { VISIT_QUIT(st, 0); } return 1; @@ -2471,37 +2542,58 @@ symtable_visit_argannotations(struct symtable *st, asdl_arg_seq *args) for (i = 0; i < asdl_seq_LEN(args); i++) { arg_ty arg = (arg_ty)asdl_seq_GET(args, i); - if (arg->annotation) + if (arg->annotation) { + st->st_cur->ste_annotations_used = 1; VISIT(st, expr, arg->annotation); + } } return 1; } static int -symtable_visit_annotations(struct symtable *st, stmt_ty o, arguments_ty a, expr_ty returns) +symtable_visit_annotations(struct symtable *st, stmt_ty o, arguments_ty a, expr_ty returns, + struct _symtable_entry *function_ste) { - int future_annotations = st->st_future->ff_features & CO_FUTURE_ANNOTATIONS; - if (future_annotations && - !symtable_enter_block(st, &_Py_ID(_annotation), AnnotationBlock, - (void *)o, o->lineno, o->col_offset, o->end_lineno, - o->end_col_offset)) { + int is_in_class = st->st_cur->ste_can_see_class_scope; + _Py_block_ty current_type = st->st_cur->ste_type; + if (!symtable_enter_block(st, &_Py_ID(__annotate__), AnnotationBlock, + (void *)a, LOCATION(o))) { VISIT_QUIT(st, 0); } + if (is_in_class || current_type == ClassBlock) { + st->st_cur->ste_can_see_class_scope = 1; + if (!symtable_add_def(st, &_Py_ID(__classdict__), USE, LOCATION(o))) { + return 0; + } + } + _Py_DECLARE_STR(format, ".format"); + // We need to insert code that reads this "parameter" to the function. + if (!symtable_add_def(st, &_Py_STR(format), DEF_PARAM, LOCATION(o))) { + return 0; + } + if (!symtable_add_def(st, &_Py_STR(format), USE, LOCATION(o))) { + return 0; + } if (a->posonlyargs && !symtable_visit_argannotations(st, a->posonlyargs)) return 0; if (a->args && !symtable_visit_argannotations(st, a->args)) return 0; - if (a->vararg && a->vararg->annotation) + if (a->vararg && a->vararg->annotation) { + st->st_cur->ste_annotations_used = 1; VISIT(st, expr, a->vararg->annotation); - if (a->kwarg && a->kwarg->annotation) + } + if (a->kwarg && a->kwarg->annotation) { + st->st_cur->ste_annotations_used = 1; VISIT(st, expr, a->kwarg->annotation); + } if (a->kwonlyargs && !symtable_visit_argannotations(st, a->kwonlyargs)) return 0; - if (future_annotations && !symtable_exit_block(st)) { - VISIT_QUIT(st, 0); + if (returns) { + st->st_cur->ste_annotations_used = 1; + VISIT(st, expr, returns); } - if (returns && !symtable_visit_annotation(st, returns)) { + if (!symtable_exit_block(st)) { VISIT_QUIT(st, 0); } return 1; @@ -2733,7 +2825,7 @@ symtable_visit_dictcomp(struct symtable *st, expr_ty e) static int symtable_raise_if_annotation_block(struct symtable *st, const char *name, expr_ty e) { - enum _block_type type = st->st_cur->ste_type; + _Py_block_ty type = st->st_cur->ste_type; if (type == AnnotationBlock) PyErr_Format(PyExc_SyntaxError, ANNOTATION_NOT_ALLOWED, name); else if (type == TypeVarBoundBlock) From ec3af291fe2f680ab277edde7113e2762754f4aa Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 11 Jun 2024 17:15:01 +0100 Subject: [PATCH 153/373] gh-120346: Respect PYTHON_BASIC_REPL when running in interactive inspect mode (#120349) --- .../2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst | 2 ++ Modules/main.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst new file mode 100644 index 00000000000000..eb2d0f9a705caa --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst @@ -0,0 +1,2 @@ +Respect :envvar:`PYTHON_BASIC_REPL` when running in interative inspect mode +(``python -i``). Patch by Pablo Galindo diff --git a/Modules/main.c b/Modules/main.c index 8eded2639ad90a..1a70b300b6ad17 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -542,7 +542,8 @@ pymain_repl(PyConfig *config, int *exitcode) return; } - if (!isatty(fileno(stdin))) { + if (!isatty(fileno(stdin)) + || _Py_GetEnv(config->use_environment, "PYTHON_BASIC_REPL")) { PyCompilerFlags cf = _PyCompilerFlags_INIT; int run = PyRun_AnyFileExFlags(stdin, "", 0, &cf); *exitcode = (run != 0); From 32a0faba439b239d7b0c242c1e3cd2025c52b8cf Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Tue, 11 Jun 2024 12:42:10 -0400 Subject: [PATCH 154/373] gh-119517: Fixes for pasting in pyrepl (#120253) * Remove pyrepl's optimization for self-insert This will be replaced by a less specialized optimization. * Use line-buffering when pyrepl echoes pastes Previously echoing was totally suppressed until the entire command had been pasted and the terminal ended paste mode, but this gives the user no feedback to indicate that an operation is in progress. Drawing something to the screen once per line strikes a balance between perceived responsiveness and performance. * Remove dead code from pyrepl `msg_at_bottom` is always true. * Speed up pyrepl's screen rendering computation The Reader in pyrepl doesn't hold a complete representation of the screen area being drawn as persistent state. Instead, it recomputes it, on each keypress. This is fast enough for a few hundred bytes, but incredibly slow as the input buffer grows into the kilobytes (likely because of pasting). Rather than making some expensive and expansive changes to the repl's internal representation of the screen, add some caching: remember some data from one refresh to the next about what was drawn to the screen and, if we don't find anything that has invalidated the results that were computed last time around, reuse them. To keep this caching as simple as possible, all we'll do is look for lines in the buffer that were above the cursor the last time we were asked to update the screen, and that are still above the cursor now. We assume that nothing can affect a line that comes before both the old and new cursor location without us being informed. Based on this assumption, we can reuse old lines, which drastically speeds up the overwhelmingly common case where the user is typing near the end of the buffer. * Speed up pyrepl prompt drawing Cache the `can_colorize()` call rather than repeatedly recomputing it. This call looks up an environment variable, and is called once per character typed at the REPL. The environment variable lookup shows up as a hot spot when profiling, and we don't expect this to change while the REPL is running. * Speed up pasting multiple lines into the REPL Previously, we were checking whether the command should be accepted each time a line break was encountered, but that's not the expected behavior. In bracketed paste mode, we expect everything pasted to be part of a single block of code, and encountering a newline shouldn't behave like a user pressing to execute a command. The user should always have a chance to review the pasted command before running it. * Use a read buffer for input in pyrepl Previously we were reading one byte at a time, which causes much slower IO than necessary. Instead, read in chunks, processing previously read data before asking for more. * Optimize finding width of a single character `wlen` finds the width of a multi-character string by adding up the width of each character, and then subtracting the width of any escape sequences. It's often called for single character strings, however, which can't possibly contain escape sequences. Optimize for that case. * Optimize disp_str for ASCII characters Since every ASCII character is known to display as single width, we can avoid not only the Unicode data lookup in `disp_str` but also the one hidden in `str_width` for them. * Speed up cursor movements in long pyrepl commands When the current pyrepl command buffer contains many lines, scrolling up becomes slow. We have optimizations in place to reuse lines above the cursor position from one refresh to the next, but don't currently try to reuse lines below the cursor position in the same way, so we wind up with quadratic behavior where all lines of the buffer below the cursor are recomputed each time the cursor moves up another line. Optimize this by only computing one screen's worth of lines beyond the cursor position. Any lines beyond that can't possibly be shown by the console, and bounding this makes scrolling up have linear time complexity instead. --------- Signed-off-by: Matt Wozniski Co-authored-by: Pablo Galindo --- Lib/_pyrepl/commands.py | 3 - Lib/_pyrepl/completing_reader.py | 8 +- Lib/_pyrepl/reader.py | 154 ++++++++++++++++++++++--------- Lib/_pyrepl/readline.py | 4 + Lib/_pyrepl/unix_console.py | 20 +++- Lib/_pyrepl/utils.py | 2 + 6 files changed, 134 insertions(+), 57 deletions(-) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 6bffed1bfe9327..c3fce91013b001 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -368,8 +368,6 @@ def do(self) -> None: r = self.reader text = self.event * r.get_arg() r.insert(text) - if len(text) == 1 and r.pos == len(r.buffer): - r.calc_screen = r.append_to_screen class insert_nl(EditCommand): @@ -483,4 +481,3 @@ def do(self) -> None: self.reader.paste_mode = False self.reader.in_bracketed_paste = False self.reader.dirty = True - self.reader.calc_screen = self.reader.calc_complete_screen diff --git a/Lib/_pyrepl/completing_reader.py b/Lib/_pyrepl/completing_reader.py index 8df35ccb9117b1..05770aaf5060cc 100644 --- a/Lib/_pyrepl/completing_reader.py +++ b/Lib/_pyrepl/completing_reader.py @@ -209,10 +209,6 @@ def do(self) -> None: r = self.reader # type: ignore[assignment] commands.self_insert.do(self) - - if r.cmpltn_menu_visible or r.cmpltn_message_visible: - r.calc_screen = r.calc_complete_screen - if r.cmpltn_menu_visible: stem = r.get_stem() if len(stem) < 1: @@ -261,8 +257,8 @@ def after_command(self, cmd: Command) -> None: if not isinstance(cmd, (complete, self_insert)): self.cmpltn_reset() - def calc_complete_screen(self) -> list[str]: - screen = super().calc_complete_screen() + def calc_screen(self) -> list[str]: + screen = super().calc_screen() if self.cmpltn_menu_visible: ly = self.lxy[1] screen[ly:ly] = self.cmpltn_menu diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index beee7764e0eb84..63ae661968408e 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -35,15 +35,13 @@ # types Command = commands.Command if False: - from typing import Callable from .types import Callback, SimpleContextManager, KeySpec, CommandName - CalcScreen = Callable[[], list[str]] def disp_str(buffer: str) -> tuple[str, list[int]]: """disp_str(buffer:string) -> (string, [int]) - Return the string that should be the printed represenation of + Return the string that should be the printed representation of |buffer| and a list detailing where the characters of |buffer| get used up. E.g.: @@ -54,11 +52,17 @@ def disp_str(buffer: str) -> tuple[str, list[int]]: b: list[int] = [] s: list[str] = [] for c in buffer: - if ord(c) > 128 and unicodedata.category(c).startswith("C"): + if ord(c) < 128: + s.append(c) + b.append(1) + elif unicodedata.category(c).startswith("C"): c = r"\u%04x" % ord(c) - s.append(c) - b.append(wlen(c)) - b.extend([0] * (len(c) - 1)) + s.append(c) + b.append(str_width(c)) + b.extend([0] * (len(c) - 1)) + else: + s.append(c) + b.append(str_width(c)) return "".join(s), b @@ -230,7 +234,6 @@ class Reader: commands: dict[str, type[Command]] = field(default_factory=make_default_commands) last_command: type[Command] | None = None syntax_table: dict[str, int] = field(default_factory=make_default_syntax_table) - msg_at_bottom: bool = True keymap: tuple[tuple[str, str], ...] = () input_trans: input.KeymapTranslator = field(init=False) input_trans_stack: list[input.KeymapTranslator] = field(default_factory=list) @@ -238,8 +241,52 @@ class Reader: screeninfo: list[tuple[int, list[int]]] = field(init=False) cxy: tuple[int, int] = field(init=False) lxy: tuple[int, int] = field(init=False) - calc_screen: CalcScreen = field(init=False) scheduled_commands: list[str] = field(default_factory=list) + can_colorize: bool = False + + ## cached metadata to speed up screen refreshes + @dataclass + class RefreshCache: + in_bracketed_paste: bool = False + screen: list[str] = field(default_factory=list) + screeninfo: list[tuple[int, list[int]]] = field(init=False) + line_end_offsets: list[int] = field(default_factory=list) + pos: int = field(init=False) + cxy: tuple[int, int] = field(init=False) + dimensions: tuple[int, int] = field(init=False) + + def update_cache(self, + reader: Reader, + screen: list[str], + screeninfo: list[tuple[int, list[int]]], + ) -> None: + self.in_bracketed_paste = reader.in_bracketed_paste + self.screen = screen.copy() + self.screeninfo = screeninfo.copy() + self.pos = reader.pos + self.cxy = reader.cxy + self.dimensions = reader.console.width, reader.console.height + + def valid(self, reader: Reader) -> bool: + dimensions = reader.console.width, reader.console.height + dimensions_changed = dimensions != self.dimensions + paste_changed = reader.in_bracketed_paste != self.in_bracketed_paste + return not (dimensions_changed or paste_changed) + + def get_cached_location(self, reader: Reader) -> tuple[int, int]: + offset = 0 + earliest_common_pos = min(reader.pos, self.pos) + num_common_lines = len(self.line_end_offsets) + while num_common_lines > 0: + offset = self.line_end_offsets[num_common_lines - 1] + if earliest_common_pos > offset: + break + num_common_lines -= 1 + else: + offset = 0 + return offset, num_common_lines + + last_refresh_cache: RefreshCache = field(default_factory=RefreshCache) def __post_init__(self) -> None: # Enable the use of `insert` without a `prepare` call - necessary to @@ -252,53 +299,60 @@ def __post_init__(self) -> None: self.screeninfo = [(0, [])] self.cxy = self.pos2xy() self.lxy = (self.pos, 0) - self.calc_screen = self.calc_complete_screen + self.can_colorize = can_colorize() + + self.last_refresh_cache.screeninfo = self.screeninfo + self.last_refresh_cache.pos = self.pos + self.last_refresh_cache.cxy = self.cxy + self.last_refresh_cache.dimensions = (0, 0) def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: return default_keymap - def append_to_screen(self) -> list[str]: - new_screen = self.screen.copy() or [''] + def calc_screen(self) -> list[str]: + """Translate changes in self.buffer into changes in self.console.screen.""" + # Since the last call to calc_screen: + # screen and screeninfo may differ due to a completion menu being shown + # pos and cxy may differ due to edits, cursor movements, or completion menus - new_character = self.buffer[-1] - new_character_len = wlen(new_character) + # Lines that are above both the old and new cursor position can't have changed, + # unless the terminal has been resized (which might cause reflowing) or we've + # entered or left paste mode (which changes prompts, causing reflowing). + num_common_lines = 0 + offset = 0 + if self.last_refresh_cache.valid(self): + offset, num_common_lines = self.last_refresh_cache.get_cached_location(self) - last_line_len = wlen(new_screen[-1]) - if last_line_len + new_character_len >= self.console.width: # We need to wrap here - new_screen[-1] += '\\' - self.screeninfo[-1][1].append(1) - new_screen.append(self.buffer[-1]) - self.screeninfo.append((0, [new_character_len])) - else: - new_screen[-1] += self.buffer[-1] - self.screeninfo[-1][1].append(new_character_len) - self.cxy = self.pos2xy() + screen = self.last_refresh_cache.screen + del screen[num_common_lines:] - # Reset the function that is used for completing the screen - self.calc_screen = self.calc_complete_screen - return new_screen + screeninfo = self.last_refresh_cache.screeninfo + del screeninfo[num_common_lines:] + + last_refresh_line_end_offsets = self.last_refresh_cache.line_end_offsets + del last_refresh_line_end_offsets[num_common_lines:] - def calc_complete_screen(self) -> list[str]: - """The purpose of this method is to translate changes in - self.buffer into changes in self.screen. Currently it rips - everything down and starts from scratch, which whilst not - especially efficient is certainly simple(r). - """ - lines = self.get_unicode().split("\n") - screen: list[str] = [] - screeninfo: list[tuple[int, list[int]]] = [] pos = self.pos - for ln, line in enumerate(lines): + pos -= offset + + lines = "".join(self.buffer[offset:]).split("\n") + cursor_found = False + lines_beyond_cursor = 0 + for ln, line in enumerate(lines, num_common_lines): ll = len(line) if 0 <= pos <= ll: - if self.msg and not self.msg_at_bottom: - for mline in self.msg.split("\n"): - screen.append(mline) - screeninfo.append((0, [])) self.lxy = pos, ln + cursor_found = True + elif cursor_found: + lines_beyond_cursor += 1 + if lines_beyond_cursor > self.console.height: + # No need to keep formatting lines. + # The console can't show them. + break prompt = self.get_prompt(ln, ll >= pos >= 0) while "\n" in prompt: pre_prompt, _, prompt = prompt.partition("\n") + last_refresh_line_end_offsets.append(offset) screen.append(pre_prompt) screeninfo.append((0, [])) pos -= ll + 1 @@ -306,6 +360,8 @@ def calc_complete_screen(self) -> list[str]: l, l2 = disp_str(line) wrapcount = (wlen(l) + lp) // self.console.width if wrapcount == 0: + offset += ll + 1 # Takes all of the line plus the newline + last_refresh_line_end_offsets.append(offset) screen.append(prompt + l) screeninfo.append((lp, l2)) else: @@ -321,11 +377,14 @@ def calc_complete_screen(self) -> list[str]: column += character_width pre = prompt if i == 0 else "" if len(l) > index_to_wrap_before: + offset += index_to_wrap_before post = "\\" after = [1] else: + offset += index_to_wrap_before + 1 # Takes the newline post = "" after = [] + last_refresh_line_end_offsets.append(offset) screen.append(pre + l[:index_to_wrap_before] + post) screeninfo.append((prelen, l2[:index_to_wrap_before] + after)) l = l[index_to_wrap_before:] @@ -333,10 +392,12 @@ def calc_complete_screen(self) -> list[str]: i += 1 self.screeninfo = screeninfo self.cxy = self.pos2xy() - if self.msg and self.msg_at_bottom: + if self.msg: for mline in self.msg.split("\n"): screen.append(mline) screeninfo.append((0, [])) + + self.last_refresh_cache.update_cache(self, screen, screeninfo) return screen @staticmethod @@ -456,7 +517,7 @@ def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: 'lineno'.""" if self.arg is not None and cursor_on_line: prompt = f"(arg: {self.arg}) " - elif self.paste_mode: + elif self.paste_mode and not self.in_bracketed_paste: prompt = "(paste) " elif "\n" in self.buffer: if lineno == 0: @@ -468,7 +529,7 @@ def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: else: prompt = self.ps1 - if can_colorize(): + if self.can_colorize: prompt = f"{ANSIColors.BOLD_MAGENTA}{prompt}{ANSIColors.RESET}" return prompt @@ -604,6 +665,9 @@ def update_screen(self) -> None: def refresh(self) -> None: """Recalculate and refresh the screen.""" + if self.in_bracketed_paste and self.buffer and not self.buffer[-1] == "\n": + return + # this call sets up self.cxy, so call it first. self.screen = self.calc_screen() self.console.refresh(self.screen, self.cxy) @@ -627,7 +691,7 @@ def do_cmd(self, cmd: tuple[str, list[str]]) -> None: self.after_command(command) - if self.dirty and not self.in_bracketed_paste: + if self.dirty: self.refresh() else: self.update_cursor() diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 7d811bf41773fe..b10d0c66e4f813 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -263,6 +263,10 @@ def do(self) -> None: r = self.reader # type: ignore[assignment] r.dirty = True # this is needed to hide the completion menu, if visible + if self.reader.in_bracketed_paste: + r.insert("\n") + return + # if there are already several lines and the cursor # is not on the last one, always insert a new \n. text = r.get_unicode() diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py index 2f73a59dd1fced..f1a6b84adfb671 100644 --- a/Lib/_pyrepl/unix_console.py +++ b/Lib/_pyrepl/unix_console.py @@ -150,6 +150,8 @@ def __init__( self.pollob = poll() self.pollob.register(self.input_fd, select.POLLIN) + self.input_buffer = b"" + self.input_buffer_pos = 0 curses.setupterm(term or None, self.output_fd) self.term = term @@ -197,6 +199,18 @@ def _my_getstr(cap: str, optional: bool = False) -> bytes | None: self.event_queue = EventQueue(self.input_fd, self.encoding) self.cursor_visible = 1 + def __read(self, n: int) -> bytes: + if not self.input_buffer or self.input_buffer_pos >= len(self.input_buffer): + self.input_buffer = os.read(self.input_fd, 10000) + + ret = self.input_buffer[self.input_buffer_pos : self.input_buffer_pos + n] + self.input_buffer_pos += len(ret) + if self.input_buffer_pos >= len(self.input_buffer): + self.input_buffer = b"" + self.input_buffer_pos = 0 + return ret + + def change_encoding(self, encoding: str) -> None: """ Change the encoding used for I/O operations. @@ -373,7 +387,7 @@ def get_event(self, block: bool = True) -> Event | None: while self.event_queue.empty(): while True: try: - self.push_char(os.read(self.input_fd, 1)) + self.push_char(self.__read(1)) except OSError as err: if err.errno == errno.EINTR: if not self.event_queue.empty(): @@ -491,7 +505,7 @@ def getpending(self): e.raw += e.raw amount = struct.unpack("i", ioctl(self.input_fd, FIONREAD, b"\0\0\0\0"))[0] - raw = os.read(self.input_fd, amount) + raw = self.__read(amount) data = str(raw, self.encoding, "replace") e.data += data e.raw += raw @@ -514,7 +528,7 @@ def getpending(self): e.raw += e.raw amount = 10000 - raw = os.read(self.input_fd, amount) + raw = self.__read(amount) data = str(raw, self.encoding, "replace") e.data += data e.raw += raw diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 96e917e487d91a..20dbb1f7e17229 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -16,6 +16,8 @@ def str_width(c: str) -> int: def wlen(s: str) -> int: + if len(s) == 1: + return str_width(s) length = sum(str_width(i) for i in s) # remove lengths of any escape sequences sequence = ANSI_ESCAPE_SEQUENCE.findall(s) From 1b62bcee941e54244b3ce6476aef8913604987c9 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 11 Jun 2024 19:00:53 +0200 Subject: [PATCH 155/373] gh-120343: Do not reset byte_col_offset_diff after multiline tokens (#120352) Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Lib/test/test_tokenize.py | 11 +++++++++++ .../2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst | 1 + Python/Python-tokenize.c | 7 ++++++- 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 4428e8cea1964c..36dba71766cc20 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1199,6 +1199,17 @@ def test_closing_parenthesis_from_different_line(self): NAME 'x' (1, 3) (1, 4) """) + def test_multiline_non_ascii_fstring(self): + self.check_tokenize("""\ +a = f''' + Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\ + NAME 'a' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + FSTRING_START "f\'\'\'" (1, 4) (1, 8) + FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68) + FSTRING_END "\'\'\'" (2, 68) (2, 71) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. diff --git a/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst new file mode 100644 index 00000000000000..76714b0c394eef --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst @@ -0,0 +1 @@ +Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module. diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 09fad18b5b4df7..2591dae35736ba 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -36,6 +36,7 @@ typedef struct /* Needed to cache line for performance */ PyObject *last_line; Py_ssize_t last_lineno; + Py_ssize_t last_end_lineno; Py_ssize_t byte_col_offset_diff; } tokenizeriterobject; @@ -77,6 +78,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline, self->last_line = NULL; self->byte_col_offset_diff = 0; self->last_lineno = 0; + self->last_end_lineno = 0; return (PyObject *)self; } @@ -227,7 +229,9 @@ tokenizeriter_next(tokenizeriterobject *it) Py_XDECREF(it->last_line); line = PyUnicode_DecodeUTF8(line_start, size, "replace"); it->last_line = line; - it->byte_col_offset_diff = 0; + if (it->tok->lineno != it->last_end_lineno) { + it->byte_col_offset_diff = 0; + } } else { // Line hasn't changed so we reuse the cached one. line = it->last_line; @@ -241,6 +245,7 @@ tokenizeriter_next(tokenizeriterobject *it) Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno; Py_ssize_t end_lineno = it->tok->lineno; it->last_lineno = lineno; + it->last_end_lineno = end_lineno; Py_ssize_t col_offset = -1; Py_ssize_t end_col_offset = -1; From 0335662fe1f663fe96e3e4acf0f34c5959d06b00 Mon Sep 17 00:00:00 2001 From: naglis <827324+naglis@users.noreply.github.com> Date: Tue, 11 Jun 2024 20:01:48 +0300 Subject: [PATCH 156/373] Fix typo in ElementTree docs (#120342) --- Doc/library/xml.etree.elementtree.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index e5919029c62c93..4c1e7bd7e6734a 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -508,7 +508,7 @@ Functions `C14N 2.0 `_ transformation function. Canonicalization is a way to normalise XML output in a way that allows - byte-by-byte comparisons and digital signatures. It reduced the freedom + byte-by-byte comparisons and digital signatures. It reduces the freedom that XML serializers have and instead generates a more constrained XML representation. The main restrictions regard the placement of namespace declarations, the ordering of attributes, and ignorable whitespace. From 86a8a1c57a386fb3330bee0fa44fc3fd6c3042a3 Mon Sep 17 00:00:00 2001 From: Eugene Triguba Date: Tue, 11 Jun 2024 13:40:31 -0400 Subject: [PATCH 157/373] gh-118908: Limit exposed globals from internal imports and definitions on new REPL startup (#119547) --- Lib/_pyrepl/simple_interact.py | 21 ++++++- Lib/test/test_pyrepl/test_pyrepl.py | 63 ++++++++++++++++++- Lib/test/test_repl.py | 5 +- ...-05-25-10-40-38.gh-issue-118908.XcZiq4.rst | 2 + 4 files changed, 83 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-25-10-40-38.gh-issue-118908.XcZiq4.rst diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index 2e5698eb131684..620f87b4867073 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -27,6 +27,7 @@ import _sitebuiltins import linecache +import builtins import sys import code from types import ModuleType @@ -34,6 +35,12 @@ from .console import InteractiveColoredConsole from .readline import _get_reader, multiline_input +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any + + _error: tuple[type[Exception], ...] | type[Exception] try: from .unix_console import _error @@ -73,20 +80,28 @@ def _clear_screen(): "clear": _clear_screen, } +DEFAULT_NAMESPACE: dict[str, Any] = { + '__name__': '__main__', + '__doc__': None, + '__package__': None, + '__loader__': None, + '__spec__': None, + '__annotations__': {}, + '__builtins__': builtins, +} def run_multiline_interactive_console( mainmodule: ModuleType | None = None, future_flags: int = 0, console: code.InteractiveConsole | None = None, ) -> None: - import __main__ from .readline import _setup _setup() - mainmodule = mainmodule or __main__ + namespace = mainmodule.__dict__ if mainmodule else DEFAULT_NAMESPACE if console is None: console = InteractiveColoredConsole( - mainmodule.__dict__, filename="" + namespace, filename="" ) if future_flags: console.compile.compiler.flags |= future_flags diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index 45114e7315749f..3167b8473bfe20 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -1,9 +1,13 @@ -import itertools import io +import itertools import os import rlcompleter -from unittest import TestCase +import select +import subprocess +import sys +from unittest import TestCase, skipUnless from unittest.mock import patch +from test.support import force_not_colorized from .support import ( FakeConsole, @@ -17,6 +21,10 @@ from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig from _pyrepl.readline import multiline_input as readline_multiline_input +try: + import pty +except ImportError: + pty = None class TestCursorPosition(TestCase): def prepare_reader(self, events): @@ -828,3 +836,54 @@ def test_bracketed_paste_single_line(self): reader = self.prepare_reader(events) output = multiline_input(reader) self.assertEqual(output, input_code) + + +@skipUnless(pty, "requires pty") +class TestMain(TestCase): + @force_not_colorized + def test_exposed_globals_in_repl(self): + expected_output = ( + "[\'__annotations__\', \'__builtins__\', \'__doc__\', \'__loader__\', " + "\'__name__\', \'__package__\', \'__spec__\']" + ) + output, exit_code = self.run_repl(["sorted(dir())", "exit"]) + if "can\'t use pyrepl" in output: + self.skipTest("pyrepl not available") + self.assertEqual(exit_code, 0) + self.assertIn(expected_output, output) + + def test_dumb_terminal_exits_cleanly(self): + env = os.environ.copy() + env.update({"TERM": "dumb"}) + output, exit_code = self.run_repl("exit()\n", env=env) + self.assertEqual(exit_code, 0) + self.assertIn("warning: can\'t use pyrepl", output) + self.assertNotIn("Exception", output) + self.assertNotIn("Traceback", output) + + def run_repl(self, repl_input: str | list[str], env: dict | None = None) -> tuple[str, int]: + master_fd, slave_fd = pty.openpty() + process = subprocess.Popen( + [sys.executable, "-i", "-u"], + stdin=slave_fd, + stdout=slave_fd, + stderr=slave_fd, + text=True, + close_fds=True, + env=env if env else os.environ, + ) + if isinstance(repl_input, list): + repl_input = "\n".join(repl_input) + "\n" + os.write(master_fd, repl_input.encode("utf-8")) + + output = [] + while select.select([master_fd], [], [], 0.5)[0]: + data = os.read(master_fd, 1024).decode("utf-8") + if not data: + break + output.append(data) + + os.close(master_fd) + os.close(slave_fd) + exit_code = process.wait() + return "\n".join(output), exit_code diff --git a/Lib/test/test_repl.py b/Lib/test/test_repl.py index 340178366fc13a..1caf09ceaf10fc 100644 --- a/Lib/test/test_repl.py +++ b/Lib/test/test_repl.py @@ -1,9 +1,9 @@ """Test the interactive interpreter.""" -import sys import os -import unittest import subprocess +import sys +import unittest from textwrap import dedent from test import support from test.support import cpython_only, has_subprocess_support, SuppressCrashReport @@ -199,7 +199,6 @@ def test_asyncio_repl_is_ok(self): assert_python_ok("-m", "asyncio") - class TestInteractiveModeSyntaxErrors(unittest.TestCase): def test_interactive_syntax_error_correct_line(self): diff --git a/Misc/NEWS.d/next/Library/2024-05-25-10-40-38.gh-issue-118908.XcZiq4.rst b/Misc/NEWS.d/next/Library/2024-05-25-10-40-38.gh-issue-118908.XcZiq4.rst new file mode 100644 index 00000000000000..bf58d7277fcd51 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-25-10-40-38.gh-issue-118908.XcZiq4.rst @@ -0,0 +1,2 @@ +Limit exposed globals from internal imports and definitions on new REPL +startup. Patch by Eugene Triguba and Pablo Galindo. From 939c201e00943c6dc2d515185168c30606ae522c Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Tue, 11 Jun 2024 20:50:21 +0300 Subject: [PATCH 158/373] gh-120326: Include on Windows with Free Threading (#120329) --- Include/Python.h | 4 ++++ .../next/Build/2024-06-11-00-38-05.gh-issue-120326.JHSDF1.rst | 2 ++ 2 files changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Build/2024-06-11-00-38-05.gh-issue-120326.JHSDF1.rst diff --git a/Include/Python.h b/Include/Python.h index 502c5ec5aeaa3c..a1b33f6d3c42b2 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -51,6 +51,10 @@ # error "The limited API is not currently supported in the free-threaded build" #endif +#if defined(Py_GIL_DISABLED) && defined(_MSC_VER) +# include // __readgsqword() +#endif + // Include Python header files #include "pyport.h" #include "pymacro.h" diff --git a/Misc/NEWS.d/next/Build/2024-06-11-00-38-05.gh-issue-120326.JHSDF1.rst b/Misc/NEWS.d/next/Build/2024-06-11-00-38-05.gh-issue-120326.JHSDF1.rst new file mode 100644 index 00000000000000..25cbdf6ba50ab8 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-06-11-00-38-05.gh-issue-120326.JHSDF1.rst @@ -0,0 +1,2 @@ +On Windows, fix build error when ``--disable-gil`` and ``--experimental-jit`` +options are combined. From 203565b2f9c74656ba519780049b46d4e5afcba1 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 12 Jun 2024 03:10:23 +0800 Subject: [PATCH 159/373] gh-120198: Fix race condition when editing __class__ with an audit hook active (GH-120195) --- Lib/test/test_free_threading/test_type.py | 1 + Lib/test/test_super.py | 35 ++++++++++++++++++- ...-06-10-15-07-16.gh-issue-120198.WW_pjO.rst | 1 + Objects/typeobject.c | 3 +- 4 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-10-15-07-16.gh-issue-120198.WW_pjO.rst diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py index 6eead198deed46..786336fa0cddce 100644 --- a/Lib/test/test_free_threading/test_type.py +++ b/Lib/test/test_free_threading/test_type.py @@ -1,3 +1,4 @@ +import threading import unittest from concurrent.futures import ThreadPoolExecutor diff --git a/Lib/test/test_super.py b/Lib/test/test_super.py index 256b416caaa584..3ffbe03f0c2f11 100644 --- a/Lib/test/test_super.py +++ b/Lib/test/test_super.py @@ -1,9 +1,10 @@ """Unit tests for zero-argument super() & related machinery.""" import textwrap +import threading import unittest from unittest.mock import patch -from test.support import import_helper +from test.support import import_helper, threading_helper ADAPTIVE_WARMUP_DELAY = 2 @@ -505,6 +506,38 @@ def some(cls): for _ in range(ADAPTIVE_WARMUP_DELAY): C.some(C) + @threading_helper.requires_working_threading() + def test___class___modification_multithreaded(self): + """ Note: this test isn't actually testing anything on its own. + It requires a sys audithook to be set to crash on older Python. + This should be the case anyways as our test suite sets + an audit hook. + """ + class Foo: + pass + + class Bar: + pass + + thing = Foo() + def work(): + foo = thing + for _ in range(5000): + foo.__class__ = Bar + type(foo) + foo.__class__ = Foo + type(foo) + + + threads = [] + for _ in range(6): + thread = threading.Thread(target=work) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-10-15-07-16.gh-issue-120198.WW_pjO.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-10-15-07-16.gh-issue-120198.WW_pjO.rst new file mode 100644 index 00000000000000..8dc8aec44d80c4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-10-15-07-16.gh-issue-120198.WW_pjO.rst @@ -0,0 +1 @@ +Fix a crash when multiple threads read and write to the same ``__class__`` of an object concurrently. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index cd16bebd1e1cb8..070e3d2f7bf2b4 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6522,7 +6522,6 @@ compatible_for_assignment(PyTypeObject* oldto, PyTypeObject* newto, const char* static int object_set_class(PyObject *self, PyObject *value, void *closure) { - PyTypeObject *oldto = Py_TYPE(self); if (value == NULL) { PyErr_SetString(PyExc_TypeError, @@ -6542,6 +6541,8 @@ object_set_class(PyObject *self, PyObject *value, void *closure) return -1; } + PyTypeObject *oldto = Py_TYPE(self); + /* In versions of CPython prior to 3.5, the code in compatible_for_assignment was not set up to correctly check for memory layout / slot / etc. compatibility for non-HEAPTYPE classes, so we just From 34e4d3287e724c065cc07b04a1ee8715817db284 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 11 Jun 2024 20:20:25 +0100 Subject: [PATCH 160/373] gh-120221: Deliver real singals on Ctrl-C and Ctrl-Z in the new REPL (#120354) --- Lib/_pyrepl/unix_console.py | 8 ++++---- .../2024-06-11-17-56-12.gh-issue-120221.si9hM9.rst | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-11-17-56-12.gh-issue-120221.si9hM9.rst diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py index f1a6b84adfb671..af9290819c2c78 100644 --- a/Lib/_pyrepl/unix_console.py +++ b/Lib/_pyrepl/unix_console.py @@ -324,13 +324,13 @@ def prepare(self): """ self.__svtermstate = tcgetattr(self.input_fd) raw = self.__svtermstate.copy() - raw.iflag &= ~(termios.BRKINT | termios.INPCK | termios.ISTRIP | termios.IXON) + raw.iflag &= ~(termios.INPCK | termios.ISTRIP | termios.IXON) raw.oflag &= ~(termios.OPOST) raw.cflag &= ~(termios.CSIZE | termios.PARENB) raw.cflag |= termios.CS8 - raw.lflag &= ~( - termios.ICANON | termios.ECHO | termios.IEXTEN | (termios.ISIG * 1) - ) + raw.iflag |= termios.BRKINT + raw.lflag &= ~(termios.ICANON | termios.ECHO | termios.IEXTEN) + raw.lflag |= termios.ISIG raw.cc[termios.VMIN] = 1 raw.cc[termios.VTIME] = 0 tcsetattr(self.input_fd, termios.TCSADRAIN, raw) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-11-17-56-12.gh-issue-120221.si9hM9.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-11-17-56-12.gh-issue-120221.si9hM9.rst new file mode 100644 index 00000000000000..3781576bc5a257 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-11-17-56-12.gh-issue-120221.si9hM9.rst @@ -0,0 +1,2 @@ +Deliver real signals on Ctrl-C and Ctrl-Z in the new REPL. Patch by Pablo +Galindo From f5a9c34f38886c5cf9c2f8d860eee3473447e030 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Wed, 12 Jun 2024 04:00:56 +0300 Subject: [PATCH 161/373] gh-120056: Add `IP_RECVERR`, `IP_RECVORIGDSTADDR`, `IP_RECVTTL` to `socket` module (#120058) * gh-120056: Add `IP_RECVERR` and `IP_RECVTTL` to `socket` module * Fix news * Address review * Update NEWS --- Doc/library/socket.rst | 4 ++++ .../2024-06-04-19-49-16.gh-issue-120056.5aqozw.rst | 3 +++ Modules/socketmodule.c | 9 +++++++++ 3 files changed, 16 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-06-04-19-49-16.gh-issue-120056.5aqozw.rst diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 2df0257d1f24f0..782fb9b27ae1ba 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -450,6 +450,10 @@ Constants same way that ``SO_BINDTODEVICE`` is used, but with the index of a network interface instead of its name. + .. versionchanged:: 3.14 + Added missing ``IP_RECVERR``, ``IP_RECVTTL``, and ``IP_RECVORIGDSTADDR`` + on Linux. + .. data:: AF_CAN PF_CAN SOL_CAN_* diff --git a/Misc/NEWS.d/next/Library/2024-06-04-19-49-16.gh-issue-120056.5aqozw.rst b/Misc/NEWS.d/next/Library/2024-06-04-19-49-16.gh-issue-120056.5aqozw.rst new file mode 100644 index 00000000000000..0adb70f51e8a0c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-04-19-49-16.gh-issue-120056.5aqozw.rst @@ -0,0 +1,3 @@ +Add :data:`!socket.IP_RECVERR` and :data:`!socket.IP_RECVTTL` constants +(both available since Linux 2.2). +And :data:`!socket.IP_RECVORIGDSTADDR` constant (available since Linux 2.6.29). diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index cb7dc25e23fb3d..0626d7934983db 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -8412,15 +8412,24 @@ socket_exec(PyObject *m) #ifdef IP_TTL ADD_INT_MACRO(m, IP_TTL); #endif +#ifdef IP_RECVERR + ADD_INT_MACRO(m, IP_RECVERR); +#endif #ifdef IP_RECVOPTS ADD_INT_MACRO(m, IP_RECVOPTS); #endif +#ifdef IP_RECVORIGDSTADDR + ADD_INT_MACRO(m, IP_RECVORIGDSTADDR); +#endif #ifdef IP_RECVRETOPTS ADD_INT_MACRO(m, IP_RECVRETOPTS); #endif #ifdef IP_RECVTOS ADD_INT_MACRO(m, IP_RECVTOS); #endif +#ifdef IP_RECVTTL + ADD_INT_MACRO(m, IP_RECVTTL); +#endif #ifdef IP_RECVDSTADDR ADD_INT_MACRO(m, IP_RECVDSTADDR); #endif From 19435d299a1fae9ad9a6bbe6609e41ddfd7f6cbe Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 12 Jun 2024 10:37:14 +0300 Subject: [PATCH 162/373] gh-120385: Fix reference leak in symtable (#120386) Decref 'new_ste' if symtable_visit_annotations() fails. --- Python/symtable.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/symtable.c b/Python/symtable.c index 287bc2bd58107d..627184da9ef4ed 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -2027,8 +2027,10 @@ symtable_visit_stmt(struct symtable *st, stmt_ty s) } if (!symtable_visit_annotations(st, s, s->v.AsyncFunctionDef.args, - s->v.AsyncFunctionDef.returns, new_ste)) + s->v.AsyncFunctionDef.returns, new_ste)) { + Py_DECREF(new_ste); VISIT_QUIT(st, 0); + } if (!symtable_enter_existing_block(st, new_ste)) { Py_DECREF(new_ste); VISIT_QUIT(st, 0); From 02e74c356223feb0771759286d24d1dbac01d4ca Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 12 Jun 2024 10:21:53 +0200 Subject: [PATCH 163/373] gh-118908: Fix completions after namespace change in REPL (#120370) --- Lib/_pyrepl/readline.py | 13 ++++++++++--- Lib/_pyrepl/simple_interact.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index b10d0c66e4f813..28f592d80b1b03 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -55,6 +55,11 @@ from collections.abc import Callable, Collection from .types import Callback, Completer, KeySpec, CommandName +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any + MoreLinesCallable = Callable[[str], bool] @@ -92,7 +97,7 @@ @dataclass class ReadlineConfig: - readline_completer: Completer | None = RLCompleter().complete + readline_completer: Completer | None = None completer_delims: frozenset[str] = frozenset(" \t\n`~!@#$%^&*()-=+[{]}\\|;:'\",<>/?") @@ -554,7 +559,7 @@ def stub(*args: object, **kwds: object) -> None: # ____________________________________________________________ -def _setup() -> None: +def _setup(namespace: dict[str, Any]) -> None: global raw_input if raw_input is not None: return # don't run _setup twice @@ -570,9 +575,11 @@ def _setup() -> None: _wrapper.f_in = f_in _wrapper.f_out = f_out + # set up namespace in rlcompleter + _wrapper.config.readline_completer = RLCompleter(namespace).complete + # this is not really what readline.c does. Better than nothing I guess import builtins - raw_input = builtins.input builtins.input = _wrapper.input diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index 620f87b4867073..2de3b38c37a9da 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -96,9 +96,9 @@ def run_multiline_interactive_console( console: code.InteractiveConsole | None = None, ) -> None: from .readline import _setup - _setup() - namespace = mainmodule.__dict__ if mainmodule else DEFAULT_NAMESPACE + _setup(namespace) + if console is None: console = InteractiveColoredConsole( namespace, filename="" From 7dd8c37a067f9fcb6a2a658d6a93b294cc2e6fb4 Mon Sep 17 00:00:00 2001 From: Owain Davies <116417456+OTheDev@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:07:25 +0700 Subject: [PATCH 164/373] gh-101575: document Decimal.__round__() (GH-101737) --- Doc/library/decimal.rst | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst index 3e33581f96f16a..db323802a6f68c 100644 --- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -897,6 +897,48 @@ Decimal objects :const:`Rounded`. If given, applies *rounding*; otherwise, uses the rounding method in either the supplied *context* or the current context. + Decimal numbers can be rounded using the :func:`.round` function: + + .. describe:: round(number) + .. describe:: round(number, ndigits) + + If *ndigits* is not given or ``None``, + returns the nearest :class:`int` to *number*, + rounding ties to even, and ignoring the rounding mode of the + :class:`Decimal` context. Raises :exc:`OverflowError` if *number* is an + infinity or :exc:`ValueError` if it is a (quiet or signaling) NaN. + + If *ndigits* is an :class:`int`, the context's rounding mode is respected + and a :class:`Decimal` representing *number* rounded to the nearest + multiple of ``Decimal('1E-ndigits')`` is returned; in this case, + ``round(number, ndigits)`` is equivalent to + ``self.quantize(Decimal('1E-ndigits'))``. Returns ``Decimal('NaN')`` if + *number* is a quiet NaN. Raises :class:`InvalidOperation` if *number* + is an infinity, a signaling NaN, or if the length of the coefficient after + the quantize operation would be greater than the current context's + precision. In other words, for the non-corner cases: + + * if *ndigits* is positive, return *number* rounded to *ndigits* decimal + places; + * if *ndigits* is zero, return *number* rounded to the nearest integer; + * if *ndigits* is negative, return *number* rounded to the nearest + multiple of ``10**abs(ndigits)``. + + For example:: + + >>> from decimal import Decimal, getcontext, ROUND_DOWN + >>> getcontext().rounding = ROUND_DOWN + >>> round(Decimal('3.75')) # context rounding ignored + 4 + >>> round(Decimal('3.5')) # round-ties-to-even + 4 + >>> round(Decimal('3.75'), 0) # uses the context rounding + Decimal('3') + >>> round(Decimal('3.75'), 1) + Decimal('3.7') + >>> round(Decimal('3.75'), -1) + Decimal('0E+1') + .. _logical_operands_label: From 755dab719dfc924dd8aef46f67512dabb8f25071 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 12 Jun 2024 13:14:50 +0200 Subject: [PATCH 165/373] gh-120029: make `symtable.Symbol.__repr__` correctly reflect the compiler's flags, add methods (#120099) Expose :class:`symtable.Symbol` methods :meth:`~symtable.Symbol.is_free_class`, :meth:`~symtable.Symbol.is_comp_iter` and :meth:`~symtable.Symbol.is_comp_cell`. --------- Co-authored-by: Carl Meyer --- Doc/library/symtable.rst | 34 +++++++++++++++++++ Doc/whatsnew/3.14.rst | 11 ++++++ Include/internal/pycore_symtable.h | 2 +- Lib/symtable.py | 32 ++++++++++++++--- Lib/test/test_symtable.py | 21 ++++++++++++ ...-06-05-11-03-10.gh-issue-120029.QBsw47.rst | 4 +++ Modules/symtablemodule.c | 2 ++ 7 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-05-11-03-10.gh-issue-120029.QBsw47.rst diff --git a/Doc/library/symtable.rst b/Doc/library/symtable.rst index e17a33f7feb1ab..050a941d9d0516 100644 --- a/Doc/library/symtable.rst +++ b/Doc/library/symtable.rst @@ -155,6 +155,8 @@ Examining Symbol Tables Return ``True`` if the symbol is a type parameter. + .. versionadded:: 3.14 + .. method:: is_global() Return ``True`` if the symbol is global. @@ -182,10 +184,42 @@ Examining Symbol Tables Return ``True`` if the symbol is referenced in its block, but not assigned to. + .. method:: is_free_class() + + Return *True* if a class-scoped symbol is free from + the perspective of a method. + + Consider the following example:: + + def f(): + x = 1 # function-scoped + class C: + x = 2 # class-scoped + def method(self): + return x + + In this example, the class-scoped symbol ``x`` is considered to + be free from the perspective of ``C.method``, thereby allowing + the latter to return *1* at runtime and not *2*. + + .. versionadded:: 3.14 + .. method:: is_assigned() Return ``True`` if the symbol is assigned to in its block. + .. method:: is_comp_iter() + + Return ``True`` if the symbol is a comprehension iteration variable. + + .. versionadded:: 3.14 + + .. method:: is_comp_cell() + + Return ``True`` if the symbol is a cell in an inlined comprehension. + + .. versionadded:: 3.14 + .. method:: is_namespace() Return ``True`` if name binding introduces new namespace. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b77ff30a8fbbee..b357553735e8bb 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -100,6 +100,17 @@ os by :func:`os.unsetenv`, or made outside Python in the same process. (Contributed by Victor Stinner in :gh:`120057`.) +symtable +-------- + +* Expose the following :class:`symtable.Symbol` methods: + + * :meth:`~symtable.Symbol.is_free_class` + * :meth:`~symtable.Symbol.is_comp_iter` + * :meth:`~symtable.Symbol.is_comp_cell` + + (Contributed by Bénédikt Tran in :gh:`120029`.) + Optimizations ============= diff --git a/Include/internal/pycore_symtable.h b/Include/internal/pycore_symtable.h index 5d544765237df5..1be48edc80c830 100644 --- a/Include/internal/pycore_symtable.h +++ b/Include/internal/pycore_symtable.h @@ -154,7 +154,7 @@ extern PyObject* _Py_Mangle(PyObject *p, PyObject *name); #define DEF_BOUND (DEF_LOCAL | DEF_PARAM | DEF_IMPORT) /* GLOBAL_EXPLICIT and GLOBAL_IMPLICIT are used internally by the symbol - table. GLOBAL is returned from PyST_GetScope() for either of them. + table. GLOBAL is returned from _PyST_GetScope() for either of them. It is stored in ste_symbols at bits 13-16. */ #define SCOPE_OFFSET 12 diff --git a/Lib/symtable.py b/Lib/symtable.py index af65e93e68eda4..d6ac1f527ba8ba 100644 --- a/Lib/symtable.py +++ b/Lib/symtable.py @@ -4,7 +4,10 @@ from _symtable import ( USE, DEF_GLOBAL, DEF_NONLOCAL, DEF_LOCAL, - DEF_PARAM, DEF_TYPE_PARAM, DEF_IMPORT, DEF_BOUND, DEF_ANNOT, + DEF_PARAM, DEF_TYPE_PARAM, + DEF_FREE_CLASS, + DEF_IMPORT, DEF_BOUND, DEF_ANNOT, + DEF_COMP_ITER, DEF_COMP_CELL, SCOPE_OFF, SCOPE_MASK, FREE, LOCAL, GLOBAL_IMPLICIT, GLOBAL_EXPLICIT, CELL ) @@ -158,6 +161,10 @@ def get_children(self): for st in self._table.children] +def _get_scope(flags): # like _PyST_GetScope() + return (flags >> SCOPE_OFF) & SCOPE_MASK + + class Function(SymbolTable): # Default values for instance variables @@ -183,7 +190,7 @@ def get_locals(self): """ if self.__locals is None: locs = (LOCAL, CELL) - test = lambda x: ((x >> SCOPE_OFF) & SCOPE_MASK) in locs + test = lambda x: _get_scope(x) in locs self.__locals = self.__idents_matching(test) return self.__locals @@ -192,7 +199,7 @@ def get_globals(self): """ if self.__globals is None: glob = (GLOBAL_IMPLICIT, GLOBAL_EXPLICIT) - test = lambda x:((x >> SCOPE_OFF) & SCOPE_MASK) in glob + test = lambda x: _get_scope(x) in glob self.__globals = self.__idents_matching(test) return self.__globals @@ -207,7 +214,7 @@ def get_frees(self): """Return a tuple of free variables in the function. """ if self.__frees is None: - is_free = lambda x:((x >> SCOPE_OFF) & SCOPE_MASK) == FREE + is_free = lambda x: _get_scope(x) == FREE self.__frees = self.__idents_matching(is_free) return self.__frees @@ -234,7 +241,7 @@ class Symbol: def __init__(self, name, flags, namespaces=None, *, module_scope=False): self.__name = name self.__flags = flags - self.__scope = (flags >> SCOPE_OFF) & SCOPE_MASK # like PyST_GetScope() + self.__scope = _get_scope(flags) self.__namespaces = namespaces or () self.__module_scope = module_scope @@ -303,6 +310,11 @@ def is_free(self): """ return bool(self.__scope == FREE) + def is_free_class(self): + """Return *True* if a class-scoped symbol is free from + the perspective of a method.""" + return bool(self.__flags & DEF_FREE_CLASS) + def is_imported(self): """Return *True* if the symbol is created from an import statement. @@ -313,6 +325,16 @@ def is_assigned(self): """Return *True* if a symbol is assigned to.""" return bool(self.__flags & DEF_LOCAL) + def is_comp_iter(self): + """Return *True* if the symbol is a comprehension iteration variable. + """ + return bool(self.__flags & DEF_COMP_ITER) + + def is_comp_cell(self): + """Return *True* if the symbol is a cell in an inlined comprehension. + """ + return bool(self.__flags & DEF_COMP_CELL) + def is_namespace(self): """Returns *True* if name binding introduces new namespace. diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py index a4b111e865c86e..903c6d66f50964 100644 --- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -304,6 +304,27 @@ def test_symbol_repr(self): self.assertEqual(repr(self.GenericMine.lookup("T")), "") + st1 = symtable.symtable("[x for x in [1]]", "?", "exec") + self.assertEqual(repr(st1.lookup("x")), + "") + + st2 = symtable.symtable("[(lambda: x) for x in [1]]", "?", "exec") + self.assertEqual(repr(st2.lookup("x")), + "") + + st3 = symtable.symtable("def f():\n" + " x = 1\n" + " class A:\n" + " x = 2\n" + " def method():\n" + " return x\n", + "?", "exec") + # child 0 is for __annotate__ + func_f = st3.get_children()[1] + class_A = func_f.get_children()[0] + self.assertEqual(repr(class_A.lookup('x')), + "") + def test_symtable_entry_repr(self): expected = f"" self.assertEqual(repr(self.top._table), expected) diff --git a/Misc/NEWS.d/next/Library/2024-06-05-11-03-10.gh-issue-120029.QBsw47.rst b/Misc/NEWS.d/next/Library/2024-06-05-11-03-10.gh-issue-120029.QBsw47.rst new file mode 100644 index 00000000000000..d1b2c592a113ce --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-05-11-03-10.gh-issue-120029.QBsw47.rst @@ -0,0 +1,4 @@ +Expose :class:`symtable.Symbol` methods :meth:`~symtable.Symbol.is_free_class`, +:meth:`~symtable.Symbol.is_comp_iter` and :meth:`~symtable.Symbol.is_comp_cell`. +Patch by Bénédikt Tran. + diff --git a/Modules/symtablemodule.c b/Modules/symtablemodule.c index 63c4dd4225298d..b39b59bf7b06bf 100644 --- a/Modules/symtablemodule.c +++ b/Modules/symtablemodule.c @@ -81,6 +81,8 @@ symtable_init_constants(PyObject *m) if (PyModule_AddIntMacro(m, DEF_IMPORT) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_BOUND) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_ANNOT) < 0) return -1; + if (PyModule_AddIntMacro(m, DEF_COMP_ITER) < 0) return -1; + if (PyModule_AddIntMacro(m, DEF_COMP_CELL) < 0) return -1; if (PyModule_AddIntConstant(m, "TYPE_FUNCTION", FunctionBlock) < 0) return -1; From 97b69db167be28a33688db436551a6c3c3ea4662 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:53:19 +0100 Subject: [PATCH 166/373] gh-93691: fix too broad source locations of for statement iterators (#120330) --- Lib/test/test_compiler_codegen.py | 1 + Lib/test/test_iter.py | 46 +++++++++++++++++++ Lib/test/test_sys_settrace.py | 6 +-- ...4-06-10-22-30-26.gh-issue-93691.68WOTS.rst | 2 + Programs/test_frozenmain.h | 9 ++-- Python/compile.c | 7 +++ 6 files changed, 63 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-10-22-30-26.gh-issue-93691.68WOTS.rst diff --git a/Lib/test/test_compiler_codegen.py b/Lib/test/test_compiler_codegen.py index 1088b4aa9e624d..d82fb85ed259ab 100644 --- a/Lib/test/test_compiler_codegen.py +++ b/Lib/test/test_compiler_codegen.py @@ -49,6 +49,7 @@ def test_for_loop(self): ('GET_ITER', None, 1), loop_lbl := self.Label(), ('FOR_ITER', exit_lbl := self.Label(), 1), + ('NOP', None, 1, 1), ('STORE_NAME', 1, 1), ('LOAD_NAME', 2, 2), ('PUSH_NULL', None, 2), diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index 9606d5beab71cb..ec2b68acb90785 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -10,6 +10,7 @@ import functools import contextlib import builtins +import traceback # Test result of triple loop (too big to inline) TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2), @@ -1143,6 +1144,51 @@ def test_error_iter(self): self.assertRaises(TypeError, iter, typ()) self.assertRaises(ZeroDivisionError, iter, BadIterableClass()) + def test_exception_locations(self): + # The location of an exception raised from __init__ or + # __next__ should should be the iterator expression + + class Iter: + def __init__(self, init_raises=False, next_raises=False): + if init_raises: + 1/0 + self.next_raises = next_raises + + def __next__(self): + if self.next_raises: + 1/0 + + def __iter__(self): + return self + + def init_raises(): + try: + for x in Iter(init_raises=True): + pass + except Exception as e: + return e + + def next_raises(): + try: + for x in Iter(next_raises=True): + pass + except Exception as e: + return e + + for func, expected in [(init_raises, "Iter(init_raises=True)"), + (next_raises, "Iter(next_raises=True)"), + ]: + with self.subTest(func): + exc = func() + f = traceback.extract_tb(exc.__traceback__)[0] + indent = 16 + co = func.__code__ + self.assertEqual(f.lineno, co.co_firstlineno + 2) + self.assertEqual(f.end_lineno, co.co_firstlineno + 2) + self.assertEqual(f.line[f.colno - indent : f.end_colno - indent], + expected) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sys_settrace.py b/Lib/test/test_sys_settrace.py index ded1d9224d82d9..c622fd9ce7c466 100644 --- a/Lib/test/test_sys_settrace.py +++ b/Lib/test/test_sys_settrace.py @@ -1650,15 +1650,15 @@ def func(): EXPECTED_EVENTS = [ (0, 'call'), (2, 'line'), - (1, 'line'), (-3, 'call'), (-2, 'line'), (-2, 'return'), - (4, 'line'), (1, 'line'), + (4, 'line'), + (2, 'line'), (-2, 'call'), (-2, 'return'), - (1, 'return'), + (2, 'return'), ] # C level events should be the same as expected and the same as Python level. diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-10-22-30-26.gh-issue-93691.68WOTS.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-10-22-30-26.gh-issue-93691.68WOTS.rst new file mode 100644 index 00000000000000..294f8d892b459b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-10-22-30-26.gh-issue-93691.68WOTS.rst @@ -0,0 +1,2 @@ +Fix source locations of instructions generated for the iterator of a for +statement. diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index cdc417e48ebec6..f34d7ea0789310 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -27,12 +27,11 @@ unsigned char M_test_frozenmain[] = { 3,0,0,0,218,3,107,101,121,169,0,243,0,0,0,0, 218,18,116,101,115,116,95,102,114,111,122,101,110,109,97,105, 110,46,112,121,218,8,60,109,111,100,117,108,101,62,114,18, - 0,0,0,1,0,0,0,115,99,0,0,0,240,3,1,1, + 0,0,0,1,0,0,0,115,94,0,0,0,240,3,1,1, 1,243,8,0,1,11,219,0,24,225,0,5,208,6,26,212, 0,27,217,0,5,128,106,144,35,151,40,145,40,212,0,27, 216,9,26,215,9,38,210,9,38,211,9,40,168,24,209,9, - 50,128,6,240,2,6,12,2,242,0,7,1,42,128,67,241, - 14,0,5,10,136,71,144,67,144,53,152,2,152,54,160,35, - 153,59,152,45,208,10,40,214,4,41,242,15,7,1,42,114, - 16,0,0,0, + 50,128,6,243,2,6,12,2,128,67,241,14,0,5,10,136, + 71,144,67,144,53,152,2,152,54,160,35,153,59,152,45,208, + 10,40,214,4,41,242,15,6,12,2,114,16,0,0,0, }; diff --git a/Python/compile.c b/Python/compile.c index c3372766d0bd50..749b69f5911386 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -3025,11 +3025,18 @@ compiler_for(struct compiler *c, stmt_ty s) RETURN_IF_ERROR(compiler_push_fblock(c, loc, FOR_LOOP, start, end, NULL)); VISIT(c, expr, s->v.For.iter); + + loc = LOC(s->v.For.iter); ADDOP(c, loc, GET_ITER); USE_LABEL(c, start); ADDOP_JUMP(c, loc, FOR_ITER, cleanup); + /* Add NOP to ensure correct line tracing of multiline for statements. + * It will be removed later if redundant. + */ + ADDOP(c, LOC(s->v.For.target), NOP); + USE_LABEL(c, body); VISIT(c, expr, s->v.For.target); VISIT_SEQ(c, stmt, s->v.For.body); From ce3879bd45e068f8e2a5a214acd234ca44cad53b Mon Sep 17 00:00:00 2001 From: Xie Yanbo Date: Wed, 12 Jun 2024 20:24:43 +0800 Subject: [PATCH 167/373] Fix typos in documentation (#120338) --- InternalDocs/compiler.md | 2 +- Lib/idlelib/HISTORY.txt | 2 +- Lib/idlelib/News3.txt | 6 +++--- Lib/idlelib/TODO.txt | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index 0abc10da6e05c6..17fe0df6e1db10 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -625,7 +625,7 @@ Objects * [Objects/locations.md](https://github.com/python/cpython/blob/main/Objects/locations.md): Describes the location table * [Objects/frame_layout.md](https://github.com/python/cpython/blob/main/Objects/frame_layout.md): Describes the frame stack -* [Objects/object_layout.md](https://github.com/python/cpython/blob/main/Objects/object_layout.md): Descibes object layout for 3.11 and later +* [Objects/object_layout.md](https://github.com/python/cpython/blob/main/Objects/object_layout.md): Describes object layout for 3.11 and later * [Exception Handling](exception_handling.md): Describes the exception table diff --git a/Lib/idlelib/HISTORY.txt b/Lib/idlelib/HISTORY.txt index 731fabd185fbbf..a601b25b5f838f 100644 --- a/Lib/idlelib/HISTORY.txt +++ b/Lib/idlelib/HISTORY.txt @@ -277,7 +277,7 @@ Command to format a paragraph. Debug menu: JIT (Just-In-Time) stack viewer toggle -- if set, the stack viewer -automaticall pops up when you get a traceback. +automatically pops up when you get a traceback. Windows menu: diff --git a/Lib/idlelib/News3.txt b/Lib/idlelib/News3.txt index fb07d7b3b3fad8..b1b652dc562c8e 100644 --- a/Lib/idlelib/News3.txt +++ b/Lib/idlelib/News3.txt @@ -568,14 +568,14 @@ bpo-33679: Enable theme-specific color configuration for Code Context. color setting, default or custom, on the extensions tab, that applied to all themes.) For built-in themes, the foreground is the same as normal text and the background is a contrasting gray. Context colors for -custom themes are set on the Hightlights tab along with other colors. +custom themes are set on the Highlights tab along with other colors. When one starts IDLE from a console and loads a custom theme without definitions for 'context', one will see a warning message on the console. bpo-33642: Display up to maxlines non-blank lines for Code Context. If there is no current context, show a single blank line. (Previously, -the Code Contex had numlines lines, usually with some blank.) The use +the Code Context had numlines lines, usually with some blank.) The use of a new option, 'maxlines' (default 15), avoids possible interference with user settings of the old option, 'numlines' (default 3). @@ -729,7 +729,7 @@ not affect their keyset-specific customization after 3.6.3. and vice versa. Initial patch by Charles Wohlganger, revised by Terry Jan Reedy. -bpo-31051: Rearrange condigdialog General tab. +bpo-31051: Rearrange configdialog General tab. Sort non-Help options into Window (Shell+Editor) and Editor (only). Leave room for the addition of new options. Patch by Terry Jan Reedy. diff --git a/Lib/idlelib/TODO.txt b/Lib/idlelib/TODO.txt index e2f1ac0f274001..41b86b0c6d5bbd 100644 --- a/Lib/idlelib/TODO.txt +++ b/Lib/idlelib/TODO.txt @@ -179,7 +179,7 @@ it -- i.e. you can only edit the current command, and the cursor can't escape from the command area. (Albert Brandl) - Set X11 class to "idle/Idle", set icon and title to something -beginning with "idle" -- for window manangers. (Randall Hopper) +beginning with "idle" -- for window managers. (Randall Hopper) - Config files editable through a preferences dialog. (me) DONE From e16aed63f64b18a26859eff3de976ded373e66b8 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 12 Jun 2024 20:41:07 +0800 Subject: [PATCH 168/373] gh-117657: Make Py_TYPE and Py_SET_TYPE thread safe (GH-120165) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Nadeshiko Manju --- Include/internal/pycore_interp.h | 5 ++++- Include/object.h | 8 +++++++ Lib/test/test_free_threading/test_type.py | 26 ++++++++++++++++++++++ Objects/typeobject.c | 8 ++++++- Tools/tsan/suppressions_free_threading.txt | 2 -- 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 86dada5061e7b5..6b5f50b88f7b85 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -401,7 +401,10 @@ PyAPI_FUNC(PyStatus) _PyInterpreterState_New( #define RARE_EVENT_INTERP_INC(interp, name) \ do { \ /* saturating add */ \ - if (interp->rare_events.name < UINT8_MAX) interp->rare_events.name++; \ + int val = FT_ATOMIC_LOAD_UINT8_RELAXED(interp->rare_events.name); \ + if (val < UINT8_MAX) { \ + FT_ATOMIC_STORE_UINT8(interp->rare_events.name, val + 1); \ + } \ RARE_EVENT_STAT_INC(name); \ } while (0); \ diff --git a/Include/object.h b/Include/object.h index c8c63b9b2b1450..4a39ada8c7daa4 100644 --- a/Include/object.h +++ b/Include/object.h @@ -246,7 +246,11 @@ _Py_IsOwnedByCurrentThread(PyObject *ob) // bpo-39573: The Py_SET_TYPE() function must be used to set an object type. static inline PyTypeObject* Py_TYPE(PyObject *ob) { +#ifdef Py_GIL_DISABLED + return (PyTypeObject *)_Py_atomic_load_ptr_relaxed(&ob->ob_type); +#else return ob->ob_type; +#endif } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_TYPE(ob) Py_TYPE(_PyObject_CAST(ob)) @@ -274,7 +278,11 @@ static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { static inline void Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { +#ifdef Py_GIL_DISABLED + _Py_atomic_store_ptr(&ob->ob_type, type); +#else ob->ob_type = type; +#endif } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_SET_TYPE(ob, type) Py_SET_TYPE(_PyObject_CAST(ob), type) diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py index 786336fa0cddce..1e84b2db2d4882 100644 --- a/Lib/test/test_free_threading/test_type.py +++ b/Lib/test/test_free_threading/test_type.py @@ -96,6 +96,32 @@ def reader_func(): self.run_one(writer_func, reader_func) + def test___class___modification(self): + class Foo: + pass + + class Bar: + pass + + thing = Foo() + def work(): + foo = thing + for _ in range(10000): + foo.__class__ = Bar + type(foo) + foo.__class__ = Foo + type(foo) + + + threads = [] + for i in range(NTHREADS): + thread = threading.Thread(target=work) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + def run_one(self, writer_func, reader_func): writer = Thread(target=writer_func) readers = [] diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 070e3d2f7bf2b4..8ecab555454cdc 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6633,9 +6633,15 @@ object_set_class(PyObject *self, PyObject *value, void *closure) if (newto->tp_flags & Py_TPFLAGS_HEAPTYPE) { Py_INCREF(newto); } + Py_BEGIN_CRITICAL_SECTION(self); + // The real Py_TYPE(self) (`oldto`) may have changed from + // underneath us in another thread, so we re-fetch it here. + oldto = Py_TYPE(self); Py_SET_TYPE(self, newto); - if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) + Py_END_CRITICAL_SECTION(); + if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) { Py_DECREF(oldto); + } RARE_EVENT_INC(set_class); return 0; diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index cb48a30751ac7b..b10b297f50da81 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -37,7 +37,6 @@ race_top:set_contains_key # https://gist.github.com/colesbury/d13d033f413b4ad07929d044bed86c35 race_top:set_discard_entry race_top:set_inheritable -race_top:Py_SET_TYPE race_top:_PyDict_CheckConsistency race_top:_Py_dict_lookup_threadsafe race_top:_multiprocessing_SemLock_acquire_impl @@ -58,7 +57,6 @@ race_top:_PyFrame_Initialize race_top:PyInterpreterState_ThreadHead race_top:_PyObject_TryGetInstanceAttribute race_top:PyThreadState_Next -race_top:Py_TYPE race_top:PyUnstable_InterpreterFrame_GetLine race_top:sock_close race_top:tstate_delete_common From 32d3e05fe67d43f7285e582a87e65374cf7c2972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 12 Jun 2024 15:23:45 +0200 Subject: [PATCH 169/373] gh-120029: remove unused macros in ``symtable.c`` (#120222) Co-authored-by: Carl Meyer Co-authored-by: Jelle Zijlstra --- Include/internal/pycore_symtable.h | 4 ---- Modules/symtablemodule.c | 1 - Python/symtable.c | 1 - 3 files changed, 6 deletions(-) diff --git a/Include/internal/pycore_symtable.h b/Include/internal/pycore_symtable.h index 1be48edc80c830..519505c0edf52a 100644 --- a/Include/internal/pycore_symtable.h +++ b/Include/internal/pycore_symtable.h @@ -143,7 +143,6 @@ extern PyObject* _Py_Mangle(PyObject *p, PyObject *name); #define DEF_PARAM (2<<1) /* formal parameter */ #define DEF_NONLOCAL (2<<2) /* nonlocal stmt */ #define USE (2<<3) /* name is used */ -#define DEF_FREE (2<<4) /* name used but not defined in nested block */ #define DEF_FREE_CLASS (2<<5) /* free variable from class's method */ #define DEF_IMPORT (2<<6) /* assignment occurred via import */ #define DEF_ANNOT (2<<7) /* this name is annotated */ @@ -166,9 +165,6 @@ extern PyObject* _Py_Mangle(PyObject *p, PyObject *name); #define FREE 4 #define CELL 5 -#define GENERATOR 1 -#define GENERATOR_EXPRESSION 2 - // Used by symtablemodule.c extern struct symtable* _Py_SymtableStringObjectFlags( const char *str, diff --git a/Modules/symtablemodule.c b/Modules/symtablemodule.c index b39b59bf7b06bf..618465536e7851 100644 --- a/Modules/symtablemodule.c +++ b/Modules/symtablemodule.c @@ -76,7 +76,6 @@ symtable_init_constants(PyObject *m) if (PyModule_AddIntMacro(m, DEF_LOCAL) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_PARAM) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_TYPE_PARAM) < 0) return -1; - if (PyModule_AddIntMacro(m, DEF_FREE) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_FREE_CLASS) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_IMPORT) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_BOUND) < 0) return -1; diff --git a/Python/symtable.c b/Python/symtable.c index 627184da9ef4ed..0490014166e65c 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -327,7 +327,6 @@ static void _dump_symtable(PySTEntryObject* ste, PyObject* prefix) if (flags & DEF_PARAM) printf(" DEF_PARAM"); if (flags & DEF_NONLOCAL) printf(" DEF_NONLOCAL"); if (flags & USE) printf(" USE"); - if (flags & DEF_FREE) printf(" DEF_FREE"); if (flags & DEF_FREE_CLASS) printf(" DEF_FREE_CLASS"); if (flags & DEF_IMPORT) printf(" DEF_IMPORT"); if (flags & DEF_ANNOT) printf(" DEF_ANNOT"); From 4b1e85bafc5bcb8cb70bb17164e07aebf7ad7e8e Mon Sep 17 00:00:00 2001 From: ixgbe00 Date: Wed, 12 Jun 2024 21:24:46 +0800 Subject: [PATCH 170/373] =?UTF-8?q?gh-120400=20=EF=BC=9ASupport=20Linux=20?= =?UTF-8?q?perf=20profile=20to=20see=20Python=20calls=20on=20RISC-V=20arch?= =?UTF-8?q?itecture=20(#120089)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Pablo Galindo Salgado --- .../next/Core and Builtins/2024-06-05-06-26-04.gh-issue- | 1 + .../2024-06-12-12-29-45.gh-issue-120400.lZYHVS.rst | 1 + Python/asm_trampoline.S | 8 ++++++++ configure | 2 ++ configure.ac | 1 + 5 files changed, 13 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-05-06-26-04.gh-issue- create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-12-12-29-45.gh-issue-120400.lZYHVS.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-05-06-26-04.gh-issue- b/Misc/NEWS.d/next/Core and Builtins/2024-06-05-06-26-04.gh-issue- new file mode 100644 index 00000000000000..29f06d43c3598c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-05-06-26-04.gh-issue- @@ -0,0 +1 @@ +Support Linux perf profiler to see Python calls on RISC-V architecture diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-12-12-29-45.gh-issue-120400.lZYHVS.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-12-29-45.gh-issue-120400.lZYHVS.rst new file mode 100644 index 00000000000000..8c86d4750e39a8 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-12-29-45.gh-issue-120400.lZYHVS.rst @@ -0,0 +1 @@ +Support Linux perf profiler to see Python calls on RISC-V architecture. diff --git a/Python/asm_trampoline.S b/Python/asm_trampoline.S index 460707717df003..0a3265dfeee204 100644 --- a/Python/asm_trampoline.S +++ b/Python/asm_trampoline.S @@ -22,6 +22,14 @@ _Py_trampoline_func_start: blr x3 ldp x29, x30, [sp], 16 ret +#endif +#ifdef __riscv + addi sp,sp,-16 + sd ra,8(sp) + jalr a3 + ld ra,8(sp) + addi sp,sp,16 + jr ra #endif .globl _Py_trampoline_func_end _Py_trampoline_func_end: diff --git a/configure b/configure index 8e605d31bb5eca..4174633b51c30a 100755 --- a/configure +++ b/configure @@ -13133,6 +13133,8 @@ case $PLATFORM_TRIPLET in #( perf_trampoline=yes ;; #( aarch64-linux-gnu) : perf_trampoline=yes ;; #( + riscv64-linux-gnu) : + perf_trampoline=yes ;; #( *) : perf_trampoline=no ;; diff --git a/configure.ac b/configure.ac index 41023ab92bad81..d34ade389cf40c 100644 --- a/configure.ac +++ b/configure.ac @@ -3641,6 +3641,7 @@ AC_MSG_CHECKING([perf trampoline]) AS_CASE([$PLATFORM_TRIPLET], [x86_64-linux-gnu], [perf_trampoline=yes], [aarch64-linux-gnu], [perf_trampoline=yes], + [riscv64-linux-gnu], [perf_trampoline=yes], [perf_trampoline=no] ) AC_MSG_RESULT([$perf_trampoline]) From 42b25dd61ff3593795c4cc2ffe876ab766098b24 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 12 Jun 2024 15:27:07 +0200 Subject: [PATCH 171/373] gh-120155: Add assertion to sre.c match_getindex() (#120402) Add an assertion to help static analyzers to detect that i*2 cannot overflow. --- Modules/_sre/sre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index e33034086481c2..0c656b47991c2f 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -2217,6 +2217,8 @@ match_getindex(MatchObject* self, PyObject* index) return -1; } + // Check that i*2 cannot overflow to make static analyzers happy + assert(i <= SRE_MAXGROUPS); return i; } From 92c9c6ae147e1e658bbc8d454f8c7b2c4dea31d1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 12 Jun 2024 17:23:03 +0300 Subject: [PATCH 172/373] gh-120345: Fix incorrect use of the :class: role with the "()" suffix (GH-120347) * Remove "()" when refer to a class as a type. * Use :func: when refer to a callable. * Fix reference to the datetime.astimezone() method. --- Doc/howto/descriptor.rst | 2 +- Doc/library/collections.rst | 2 +- Doc/library/datetime.rst | 2 +- Doc/library/fileinput.rst | 2 +- Doc/tutorial/stdlib2.rst | 4 ++-- Doc/whatsnew/2.5.rst | 4 ++-- Doc/whatsnew/3.12.rst | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Doc/howto/descriptor.rst b/Doc/howto/descriptor.rst index 51f9f4a6556e57..b29488be39a0a3 100644 --- a/Doc/howto/descriptor.rst +++ b/Doc/howto/descriptor.rst @@ -787,7 +787,7 @@ Invocation from super --------------------- The logic for super's dotted lookup is in the :meth:`__getattribute__` method for -object returned by :class:`super()`. +object returned by :func:`super`. A dotted lookup such as ``super(A, obj).m`` searches ``obj.__class__.__mro__`` for the base class ``B`` immediately following ``A`` and then returns diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 2a269712f1814d..ce89101d6b667c 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -99,7 +99,7 @@ The class can be used to simulate nested scopes and is useful in templating. :func:`super` function. A reference to ``d.parents`` is equivalent to: ``ChainMap(*d.maps[1:])``. - Note, the iteration order of a :class:`ChainMap()` is determined by + Note, the iteration order of a :class:`ChainMap` is determined by scanning the mappings last to first:: >>> baseline = {'music': 'bach', 'art': 'rembrandt'} diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 0723d0fe2fceb0..b6d8e6e6df07fa 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -2153,7 +2153,7 @@ There is one more :class:`tzinfo` method that a subclass may wish to override: .. method:: tzinfo.fromutc(dt) - This is called from the default :class:`datetime.astimezone()` + This is called from the default :meth:`datetime.astimezone` implementation. When called from that, ``dt.tzinfo`` is *self*, and *dt*'s date and time data are to be viewed as expressing a UTC time. The purpose of :meth:`fromutc` is to adjust the date and time data, returning an diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst index 94a4139f64c2e4..8f32b11e565365 100644 --- a/Doc/library/fileinput.rst +++ b/Doc/library/fileinput.rst @@ -47,7 +47,7 @@ Lines are returned with any newlines intact, which means that the last line in a file may not have one. You can control how files are opened by providing an opening hook via the -*openhook* parameter to :func:`fileinput.input` or :class:`FileInput()`. The +*openhook* parameter to :func:`fileinput.input` or :func:`FileInput`. The hook must be a function that takes two arguments, *filename* and *mode*, and returns an accordingly opened file-like object. If *encoding* and/or *errors* are specified, they will be passed to the hook as additional keyword arguments. diff --git a/Doc/tutorial/stdlib2.rst b/Doc/tutorial/stdlib2.rst index 4bc810ce36c71b..719f772e687008 100644 --- a/Doc/tutorial/stdlib2.rst +++ b/Doc/tutorial/stdlib2.rst @@ -293,7 +293,7 @@ Many data structure needs can be met with the built-in list type. However, sometimes there is a need for alternative implementations with different performance trade-offs. -The :mod:`array` module provides an :class:`~array.array()` object that is like +The :mod:`array` module provides an :class:`~array.array` object that is like a list that stores only homogeneous data and stores it more compactly. The following example shows an array of numbers stored as two byte unsigned binary numbers (typecode ``"H"``) rather than the usual 16 bytes per entry for regular @@ -306,7 +306,7 @@ lists of Python int objects:: >>> a[1:3] array('H', [10, 700]) -The :mod:`collections` module provides a :class:`~collections.deque()` object +The :mod:`collections` module provides a :class:`~collections.deque` object that is like a list with faster appends and pops from the left side but slower lookups in the middle. These objects are well suited for implementing queues and breadth first tree searches:: diff --git a/Doc/whatsnew/2.5.rst b/Doc/whatsnew/2.5.rst index 2ae26e7a106a0b..3430ac8668e280 100644 --- a/Doc/whatsnew/2.5.rst +++ b/Doc/whatsnew/2.5.rst @@ -1724,7 +1724,7 @@ attribute of the function object to change this:: :mod:`ctypes` also provides a wrapper for Python's C API as the ``ctypes.pythonapi`` object. This object does *not* release the global interpreter lock before calling a function, because the lock must be held when -calling into the interpreter's code. There's a :class:`py_object()` type +calling into the interpreter's code. There's a :class:`~ctypes.py_object` type constructor that will create a :c:expr:`PyObject *` pointer. A simple usage:: import ctypes @@ -1734,7 +1734,7 @@ constructor that will create a :c:expr:`PyObject *` pointer. A simple usage:: ctypes.py_object("abc"), ctypes.py_object(1)) # d is now {'abc', 1}. -Don't forget to use :class:`py_object()`; if it's omitted you end up with a +Don't forget to use :func:`~ctypes.py_object`; if it's omitted you end up with a segmentation fault. :mod:`ctypes` has been around for a while, but people still write and diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 28b28e9ce50e11..93d18ffc76d07c 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -739,7 +739,7 @@ inspect itertools --------- -* Add :class:`itertools.batched()` for collecting into even-sized +* Add :func:`itertools.batched` for collecting into even-sized tuples where the last batch may be shorter than the rest. (Contributed by Raymond Hettinger in :gh:`98363`.) From fabcf6bc8f89f008319442dea614d5cbeb959544 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Wed, 12 Jun 2024 17:50:58 +0300 Subject: [PATCH 173/373] gh-120388: Improve deprecation warning message, when test returns non-None (#120401) Co-authored-by: Alex Waygood Co-authored-by: Serhiy Storchaka --- Lib/test/test_unittest/test_async_case.py | 3 +++ Lib/test/test_unittest/test_case.py | 19 +++++++++++++++++++ Lib/unittest/async_case.py | 10 +++++++--- Lib/unittest/case.py | 15 ++++++++++++--- ...-06-12-15-07-58.gh-issue-120388.VuTQMT.rst | 3 +++ 5 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-12-15-07-58.gh-issue-120388.VuTQMT.rst diff --git a/Lib/test/test_unittest/test_async_case.py b/Lib/test/test_unittest/test_async_case.py index ba1ab838cd4a22..00ef55bdf9bc83 100644 --- a/Lib/test/test_unittest/test_async_case.py +++ b/Lib/test/test_unittest/test_async_case.py @@ -312,18 +312,21 @@ async def test3(self): self.assertIn('It is deprecated to return a value that is not None', str(w.warning)) self.assertIn('test1', str(w.warning)) self.assertEqual(w.filename, __file__) + self.assertIn("returned 'int'", str(w.warning)) with self.assertWarns(DeprecationWarning) as w: Test('test2').run() self.assertIn('It is deprecated to return a value that is not None', str(w.warning)) self.assertIn('test2', str(w.warning)) self.assertEqual(w.filename, __file__) + self.assertIn("returned 'async_generator'", str(w.warning)) with self.assertWarns(DeprecationWarning) as w: Test('test3').run() self.assertIn('It is deprecated to return a value that is not None', str(w.warning)) self.assertIn('test3', str(w.warning)) self.assertEqual(w.filename, __file__) + self.assertIn(f'returned {Nothing.__name__!r}', str(w.warning)) def test_cleanups_interleave_order(self): events = [] diff --git a/Lib/test/test_unittest/test_case.py b/Lib/test/test_unittest/test_case.py index ed5eb5609a5dd1..17420909402107 100644 --- a/Lib/test/test_unittest/test_case.py +++ b/Lib/test/test_unittest/test_case.py @@ -325,18 +325,37 @@ def test3(self): self.assertIn('It is deprecated to return a value that is not None', str(w.warning)) self.assertIn('test1', str(w.warning)) self.assertEqual(w.filename, __file__) + self.assertIn("returned 'int'", str(w.warning)) with self.assertWarns(DeprecationWarning) as w: Foo('test2').run() self.assertIn('It is deprecated to return a value that is not None', str(w.warning)) self.assertIn('test2', str(w.warning)) self.assertEqual(w.filename, __file__) + self.assertIn("returned 'generator'", str(w.warning)) with self.assertWarns(DeprecationWarning) as w: Foo('test3').run() self.assertIn('It is deprecated to return a value that is not None', str(w.warning)) self.assertIn('test3', str(w.warning)) self.assertEqual(w.filename, __file__) + self.assertIn(f'returned {Nothing.__name__!r}', str(w.warning)) + + def test_deprecation_of_return_val_from_test_async_method(self): + class Foo(unittest.TestCase): + async def test1(self): + return 1 + + with self.assertWarns(DeprecationWarning) as w: + Foo('test1').run() + self.assertIn('It is deprecated to return a value that is not None', str(w.warning)) + self.assertIn('test1', str(w.warning)) + self.assertEqual(w.filename, __file__) + self.assertIn("returned 'coroutine'", str(w.warning)) + self.assertIn( + 'Maybe you forgot to use IsolatedAsyncioTestCase as the base class?', + str(w.warning), + ) def _check_call_order__subtests(self, result, events, expected_events): class Foo(Test.LoggingTestCase): diff --git a/Lib/unittest/async_case.py b/Lib/unittest/async_case.py index 63ff6a5d1f8b61..bd06eb3207697a 100644 --- a/Lib/unittest/async_case.py +++ b/Lib/unittest/async_case.py @@ -90,9 +90,13 @@ def _callSetUp(self): self._callAsync(self.asyncSetUp) def _callTestMethod(self, method): - if self._callMaybeAsync(method) is not None: - warnings.warn(f'It is deprecated to return a value that is not None from a ' - f'test case ({method})', DeprecationWarning, stacklevel=4) + result = self._callMaybeAsync(method) + if result is not None: + msg = ( + f'It is deprecated to return a value that is not None ' + f'from a test case ({method} returned {type(result).__name__!r})', + ) + warnings.warn(msg, DeprecationWarning, stacklevel=4) def _callTearDown(self): self._callAsync(self.asyncTearDown) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index 36daa61fa31adb..55c79d353539ca 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -603,9 +603,18 @@ def _callSetUp(self): self.setUp() def _callTestMethod(self, method): - if method() is not None: - warnings.warn(f'It is deprecated to return a value that is not None from a ' - f'test case ({method})', DeprecationWarning, stacklevel=3) + result = method() + if result is not None: + import inspect + msg = ( + f'It is deprecated to return a value that is not None ' + f'from a test case ({method} returned {type(result).__name__!r})' + ) + if inspect.iscoroutine(result): + msg += ( + '. Maybe you forgot to use IsolatedAsyncioTestCase as the base class?' + ) + warnings.warn(msg, DeprecationWarning, stacklevel=3) def _callTearDown(self): self.tearDown() diff --git a/Misc/NEWS.d/next/Library/2024-06-12-15-07-58.gh-issue-120388.VuTQMT.rst b/Misc/NEWS.d/next/Library/2024-06-12-15-07-58.gh-issue-120388.VuTQMT.rst new file mode 100644 index 00000000000000..d13df7d88b776c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-12-15-07-58.gh-issue-120388.VuTQMT.rst @@ -0,0 +1,3 @@ +Improve a warning message when a test method in :mod:`unittest` returns +something other than ``None``. Now we show the returned object type and +optional asyncio-related tip. From 127c1d2771749853e287632c086b6054212bf12a Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Thu, 13 Jun 2024 01:46:39 +0900 Subject: [PATCH 174/373] gh-71587: Drop local reference cache to `_strptime` module in `_datetime` (gh-120224) The _strptime module object was cached in a static local variable (in the datetime.strptime() implementation). That's a problem when it crosses isolation boundaries, such as reinitializing the runtme or between interpreters. This change fixes the problem by dropping the static variable, instead always relying on the normal sys.modules cache (via PyImport_Import()). --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_runtime_init_generated.h | 1 + Include/internal/pycore_unicodeobject_generated.h | 3 +++ Lib/test/test_embed.py | 9 +++++++++ .../2024-06-07-11-23-31.gh-issue-71587.IjFajE.rst | 2 ++ Modules/_datetimemodule.c | 14 +++++++------- Tools/c-analyzer/cpython/globals-to-fix.tsv | 1 - 8 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-07-11-23-31.gh-issue-71587.IjFajE.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 30851dc2dbec44..bc94930b85f098 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -777,6 +777,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_showwarnmsg)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_shutdown)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_slotnames)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_strptime)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_strptime_datetime)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_swappedbytes_)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_type_)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 009802c441685c..998be2ec490dd9 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -266,6 +266,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(_showwarnmsg) STRUCT_FOR_ID(_shutdown) STRUCT_FOR_ID(_slotnames) + STRUCT_FOR_ID(_strptime) STRUCT_FOR_ID(_strptime_datetime) STRUCT_FOR_ID(_swappedbytes_) STRUCT_FOR_ID(_type_) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index ff5b6ee8e0f006..bd79a7dff42f89 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -775,6 +775,7 @@ extern "C" { INIT_ID(_showwarnmsg), \ INIT_ID(_shutdown), \ INIT_ID(_slotnames), \ + INIT_ID(_strptime), \ INIT_ID(_strptime_datetime), \ INIT_ID(_swappedbytes_), \ INIT_ID(_type_), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 69d93a9610a2e5..7284aeb592d7ec 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -636,6 +636,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(_slotnames); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(_strptime); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(_strptime_datetime); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index d94c63a13b8ea4..634513ec7a5812 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -404,6 +404,15 @@ def test_ucnhash_capi_reset(self): out, err = self.run_embedded_interpreter("test_repeated_init_exec", code) self.assertEqual(out, '9\n' * INIT_LOOPS) + def test_datetime_reset_strptime(self): + code = ( + "import datetime;" + "d = datetime.datetime.strptime('2000-01-01', '%Y-%m-%d');" + "print(d.strftime('%Y%m%d'))" + ) + out, err = self.run_embedded_interpreter("test_repeated_init_exec", code) + self.assertEqual(out, '20000101\n' * INIT_LOOPS) + @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-06-07-11-23-31.gh-issue-71587.IjFajE.rst b/Misc/NEWS.d/next/Library/2024-06-07-11-23-31.gh-issue-71587.IjFajE.rst new file mode 100644 index 00000000000000..50a662977993f5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-07-11-23-31.gh-issue-71587.IjFajE.rst @@ -0,0 +1,2 @@ +Fix crash in C version of :meth:`datetime.datetime.strptime` when called again +on the restarted interpreter. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index bea6e9411a75ed..cb4622893375d7 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5514,19 +5514,19 @@ datetime_utcfromtimestamp(PyObject *cls, PyObject *args) static PyObject * datetime_strptime(PyObject *cls, PyObject *args) { - static PyObject *module = NULL; - PyObject *string, *format; + PyObject *string, *format, *result; if (!PyArg_ParseTuple(args, "UU:strptime", &string, &format)) return NULL; + PyObject *module = PyImport_Import(&_Py_ID(_strptime)); if (module == NULL) { - module = PyImport_ImportModule("_strptime"); - if (module == NULL) - return NULL; + return NULL; } - return PyObject_CallMethodObjArgs(module, &_Py_ID(_strptime_datetime), - cls, string, format, NULL); + result = PyObject_CallMethodObjArgs(module, &_Py_ID(_strptime_datetime), + cls, string, format, NULL); + Py_DECREF(module); + return result; } /* Return new datetime from date/datetime and time arguments. */ diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 4586a59f6ac2ef..cb9750a69a632b 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -393,7 +393,6 @@ Modules/xxmodule.c - ErrorObject - ## initialized once Modules/_cursesmodule.c - ModDict - -Modules/_datetimemodule.c datetime_strptime module - ## state Modules/_datetimemodule.c - _datetime_global_state - From 4b5d3e0e721a952f4ac9d17bee331e6dfe543dcd Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 12 Jun 2024 20:52:55 +0200 Subject: [PATCH 175/373] gh-120343: Fix column offsets of multiline tokens in tokenize (#120391) --- Lib/test/test_tokenize.py | 14 ++++++++++++++ Python/Python-tokenize.c | 14 ++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 36dba71766cc20..51aeb35f01065a 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1210,6 +1210,20 @@ def test_multiline_non_ascii_fstring(self): FSTRING_END "\'\'\'" (2, 68) (2, 71) """) + def test_multiline_non_ascii_fstring_with_expr(self): + self.check_tokenize("""\ +f''' + 🔗 This is a test {test_arg1}🔗 +🔗'''""", """\ + FSTRING_START "f\'\'\'" (1, 0) (1, 4) + FSTRING_MIDDLE '\\n 🔗 This is a test ' (1, 4) (2, 21) + OP '{' (2, 21) (2, 22) + NAME 'test_arg1' (2, 22) (2, 31) + OP '}' (2, 31) (2, 32) + FSTRING_MIDDLE '🔗\\n🔗' (2, 32) (3, 1) + FSTRING_END "\'\'\'" (3, 1) (3, 4) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 2591dae35736ba..55c821754c2031 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -215,6 +215,7 @@ tokenizeriter_next(tokenizeriterobject *it) const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start; PyObject* line = NULL; + int line_changed = 1; if (it->tok->tok_extra_tokens && is_trailing_token) { line = PyUnicode_FromString(""); } else { @@ -229,12 +230,11 @@ tokenizeriter_next(tokenizeriterobject *it) Py_XDECREF(it->last_line); line = PyUnicode_DecodeUTF8(line_start, size, "replace"); it->last_line = line; - if (it->tok->lineno != it->last_end_lineno) { - it->byte_col_offset_diff = 0; - } + it->byte_col_offset_diff = 0; } else { // Line hasn't changed so we reuse the cached one. line = it->last_line; + line_changed = 0; } } if (line == NULL) { @@ -252,7 +252,13 @@ tokenizeriter_next(tokenizeriterobject *it) Py_ssize_t byte_offset = -1; if (token.start != NULL && token.start >= line_start) { byte_offset = token.start - line_start; - col_offset = byte_offset - it->byte_col_offset_diff; + if (line_changed) { + col_offset = _PyPegen_byte_offset_to_character_offset_line(line, 0, byte_offset); + it->byte_col_offset_diff = byte_offset - col_offset; + } + else { + col_offset = byte_offset - it->byte_col_offset_diff; + } } if (token.end != NULL && token.end >= it->tok->line_start) { Py_ssize_t end_byte_offset = token.end - it->tok->line_start; From 4c6d4f5cb33e48519922d635894eef356faddba2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 12 Jun 2024 20:56:42 +0200 Subject: [PATCH 176/373] gh-120417: Remove unused imports in the stdlib (#120420) --- Lib/_pyrepl/historical_reader.py | 2 +- Lib/_pyrepl/pager.py | 2 +- Lib/_pyrepl/unix_console.py | 1 - Lib/_pyrepl/windows_console.py | 3 --- Lib/dataclasses.py | 3 +-- Lib/dbm/sqlite3.py | 1 - Lib/idlelib/grep.py | 2 +- Lib/importlib/abc.py | 1 - Lib/ntpath.py | 1 - Lib/pydoc.py | 2 +- Lib/stat.py | 1 - 11 files changed, 5 insertions(+), 14 deletions(-) diff --git a/Lib/_pyrepl/historical_reader.py b/Lib/_pyrepl/historical_reader.py index 121de33da5052f..dd90912d1d67f8 100644 --- a/Lib/_pyrepl/historical_reader.py +++ b/Lib/_pyrepl/historical_reader.py @@ -27,7 +27,7 @@ if False: - from .types import Callback, SimpleContextManager, KeySpec, CommandName + from .types import SimpleContextManager, KeySpec, CommandName isearch_keymap: tuple[tuple[KeySpec, CommandName], ...] = tuple( diff --git a/Lib/_pyrepl/pager.py b/Lib/_pyrepl/pager.py index 1ac733ed3573a4..66dcd99111adfc 100644 --- a/Lib/_pyrepl/pager.py +++ b/Lib/_pyrepl/pager.py @@ -8,7 +8,7 @@ # types if False: - from typing import Protocol, Any + from typing import Protocol class Pager(Protocol): def __call__(self, text: str, title: str = "") -> None: ... diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py index af9290819c2c78..c4dedd97d1e13d 100644 --- a/Lib/_pyrepl/unix_console.py +++ b/Lib/_pyrepl/unix_console.py @@ -27,7 +27,6 @@ import select import signal import struct -import sys import termios import time from fcntl import ioctl diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index f691ca3fbb07b8..9e97b1524e29a0 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -20,15 +20,12 @@ from __future__ import annotations import io -from multiprocessing import Value import os import sys import time import msvcrt -from abc import ABC, abstractmethod from collections import deque -from dataclasses import dataclass, field import ctypes from ctypes.wintypes import ( _COORD, diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index aeafbfbbe6e9c4..74011b7e28b9f3 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -7,7 +7,6 @@ import itertools import abc from reprlib import recursive_repr -from types import FunctionType, GenericAlias __all__ = ['dataclass', @@ -333,7 +332,7 @@ def __set_name__(self, owner, name): # it. func(self.default, owner, name) - __class_getitem__ = classmethod(GenericAlias) + __class_getitem__ = classmethod(types.GenericAlias) class _DataclassParams: diff --git a/Lib/dbm/sqlite3.py b/Lib/dbm/sqlite3.py index 74c9d9b7e2f1d8..7e0ae2a29e3a64 100644 --- a/Lib/dbm/sqlite3.py +++ b/Lib/dbm/sqlite3.py @@ -1,6 +1,5 @@ import os import sqlite3 -import sys from pathlib import Path from contextlib import suppress, closing from collections.abc import MutableMapping diff --git a/Lib/idlelib/grep.py b/Lib/idlelib/grep.py index ef14349960bfa2..42048ff2395fe1 100644 --- a/Lib/idlelib/grep.py +++ b/Lib/idlelib/grep.py @@ -190,7 +190,7 @@ def grep_it(self, prog, path): def _grep_dialog(parent): # htest # - from tkinter import Toplevel, Text, SEL, END + from tkinter import Toplevel, Text, SEL from tkinter.ttk import Frame, Button from idlelib.pyshell import PyShellFileList diff --git a/Lib/importlib/abc.py b/Lib/importlib/abc.py index b6b2c791a3b03f..eea6b38af6fa13 100644 --- a/Lib/importlib/abc.py +++ b/Lib/importlib/abc.py @@ -13,7 +13,6 @@ _frozen_importlib_external = _bootstrap_external from ._abc import Loader import abc -import warnings __all__ = [ diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 83e2d3b865757c..1b1873f08b608b 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -19,7 +19,6 @@ import os import sys -import stat import genericpath from genericpath import * diff --git a/Lib/pydoc.py b/Lib/pydoc.py index d7579c1cc3dcd1..278e4846ebb71f 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -75,7 +75,7 @@ class or function within a module or module in a package. If the from reprlib import Repr from traceback import format_exception_only -from _pyrepl.pager import (get_pager, plain, escape_less, pipe_pager, +from _pyrepl.pager import (get_pager, plain, pipe_pager, plain_pager, tempfile_pager, tty_pager) diff --git a/Lib/stat.py b/Lib/stat.py index 9167ab185944fb..1b4ed1ebc940ef 100644 --- a/Lib/stat.py +++ b/Lib/stat.py @@ -2,7 +2,6 @@ Suggested usage: from stat import * """ -import sys # Indices for stat struct members in the tuple returned by os.stat() From 3453362183f083e37ea866a7ae1b34147ffaf81d Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 12 Jun 2024 20:09:25 +0100 Subject: [PATCH 177/373] gh-118908: Protect the REPL subprocess with a timeout in tests (#120408) --- Lib/test/test_pyrepl/test_pyrepl.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index 3167b8473bfe20..41ba5959a1ec34 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -8,6 +8,7 @@ from unittest import TestCase, skipUnless from unittest.mock import patch from test.support import force_not_colorized +from test.support import SHORT_TIMEOUT from .support import ( FakeConsole, @@ -885,5 +886,9 @@ def run_repl(self, repl_input: str | list[str], env: dict | None = None) -> tupl os.close(master_fd) os.close(slave_fd) - exit_code = process.wait() + try: + exit_code = process.wait(timeout=SHORT_TIMEOUT) + except subprocess.TimeoutExpired: + process.kill() + exit_code = process.returncode return "\n".join(output), exit_code From 030b452e34bbb0096acacb70a31915b9590c8186 Mon Sep 17 00:00:00 2001 From: Stefano Rivera Date: Wed, 12 Jun 2024 12:19:36 -0700 Subject: [PATCH 178/373] gh-120418: Don't assume wheeldata is deleted if `WHEEL_PKG_DIR` is set (#120419) Remove wheeldata from both sides of the `assertEqual`, so that we're *actually* ignoring it from the test set. This test is only making assertions about the source tree, no code is being executed that would do anything different based on the value of `WHEEL_PKG_DIR`. --- Lib/test/test_tools/test_makefile.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tools/test_makefile.py b/Lib/test/test_tools/test_makefile.py index 48a7c1a773bb83..df95e6d0068516 100644 --- a/Lib/test/test_tools/test_makefile.py +++ b/Lib/test/test_tools/test_makefile.py @@ -41,7 +41,7 @@ def test_makefile_test_folders(self): idle_test = 'idlelib/idle_test' self.assertIn(idle_test, test_dirs) - used = [idle_test] + used = set([idle_test]) for dirpath, dirs, files in os.walk(support.TEST_HOME_DIR): dirname = os.path.basename(dirpath) # Skip temporary dirs: @@ -65,13 +65,14 @@ def test_makefile_test_folders(self): "of test directories to install" ) ) - used.append(relpath) + used.add(relpath) # Don't check the wheel dir when Python is built --with-wheel-pkg-dir if sysconfig.get_config_var('WHEEL_PKG_DIR'): test_dirs.remove('test/wheeldata') + used.discard('test/wheeldata') # Check that there are no extra entries: unique_test_dirs = set(test_dirs) - self.assertSetEqual(unique_test_dirs, set(used)) + self.assertSetEqual(unique_test_dirs, used) self.assertEqual(len(test_dirs), len(unique_test_dirs)) From eebae2c460dabdc70dc0d9b6e189368eb1abb716 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Thu, 13 Jun 2024 17:29:19 +0800 Subject: [PATCH 179/373] gh-117657: Make PyType_HasFeature atomic (GH-120210) Make PyType_HasFeature atomic --- Include/internal/pycore_object.h | 2 +- Include/internal/pycore_pyatomic_ft_wrappers.h | 3 +++ Tools/tsan/suppressions_free_threading.txt | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 6f133014ce06e2..d1e2773a2473b0 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -262,7 +262,7 @@ extern int _PyTraceMalloc_TraceRef(PyObject *op, PyRefTracerEvent event, void*); // Fast inlined version of PyType_HasFeature() static inline int _PyType_HasFeature(PyTypeObject *type, unsigned long feature) { - return ((type->tp_flags & feature) != 0); + return ((FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags) & feature) != 0); } extern void _PyType_InitCache(PyInterpreterState *interp); diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index bc6aba56cf9fc7..a1bb383bcd22e9 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -45,6 +45,8 @@ extern "C" { _Py_atomic_load_uint16_relaxed(&value) #define FT_ATOMIC_LOAD_UINT32_RELAXED(value) \ _Py_atomic_load_uint32_relaxed(&value) +#define FT_ATOMIC_LOAD_ULONG_RELAXED(value) \ + _Py_atomic_load_ulong_relaxed(&value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ @@ -75,6 +77,7 @@ extern "C" { #define FT_ATOMIC_LOAD_UINT8_RELAXED(value) value #define FT_ATOMIC_LOAD_UINT16_RELAXED(value) value #define FT_ATOMIC_LOAD_UINT32_RELAXED(value) value +#define FT_ATOMIC_LOAD_ULONG_RELAXED(value) value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index b10b297f50da81..05ceaf438b6353 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -26,7 +26,6 @@ race:free_threadstate race_top:_add_to_weak_set race_top:_in_weak_set race_top:_PyEval_EvalFrameDefault -race_top:_PyType_HasFeature race_top:assign_version_tag race_top:insertdict race_top:lookup_tp_dict From b1b61dc4cee43920ef2b08d5ac94ddf08119c507 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Thu, 13 Jun 2024 17:31:21 +0800 Subject: [PATCH 180/373] gh-117657: Fix some simple races in instrumentation.c (GH-120118) * stop the world when setting local events --- Python/instrumentation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/instrumentation.c b/Python/instrumentation.c index a5211ee5428cf8..ae790a1441b933 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1977,7 +1977,7 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent } int res; - LOCK_CODE(code); + _PyEval_StopTheWorld(interp); if (allocate_instrumentation_data(code)) { res = -1; goto done; @@ -1994,7 +1994,7 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent res = force_instrument_lock_held(code, interp); done: - UNLOCK_CODE(); + _PyEval_StartTheWorld(interp); return res; } From 87cedaa5c857dc615b9f618b020414187fc1c966 Mon Sep 17 00:00:00 2001 From: Xie Yanbo Date: Thu, 13 Jun 2024 17:37:21 +0800 Subject: [PATCH 181/373] Fix typos in documentation (GH-120440) --- .../2024-06-10-10-42-48.gh-issue-120298.napREA.rst | 2 +- .../2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-10-10-42-48.gh-issue-120298.napREA.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-10-10-42-48.gh-issue-120298.napREA.rst index 531d39517ac423..2872006ee34b8b 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-06-10-10-42-48.gh-issue-120298.napREA.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-10-10-42-48.gh-issue-120298.napREA.rst @@ -1,2 +1,2 @@ Fix use-after free in ``list_richcompare_impl`` which can be invoked via -some specificly tailored evil input. +some specifically tailored evil input. diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst index eb2d0f9a705caa..757a21625cfb83 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-11-12-47-54.gh-issue-120346.hhn_6X.rst @@ -1,2 +1,2 @@ -Respect :envvar:`PYTHON_BASIC_REPL` when running in interative inspect mode +Respect :envvar:`PYTHON_BASIC_REPL` when running in interactive inspect mode (``python -i``). Patch by Pablo Galindo From ca5108a46d5da3978d4bd29717ea3fbdee772e66 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 13 Jun 2024 14:38:31 +0300 Subject: [PATCH 182/373] gh-119146: Update ``regexp`` in `build.yml` to not trigger the jobs on `*.md` and `*.ini` files. (#120435) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index eb325ac2f9ee1b..750aa1ed87bca1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -54,7 +54,7 @@ jobs: # into the PR branch anyway. # # https://github.com/python/core-workflow/issues/373 - git diff --name-only origin/$GITHUB_BASE_REF.. | grep -qvE '(\.rst$|^Doc|^Misc|^\.pre-commit-config\.yaml$|\.ruff\.toml$)' && echo "run_tests=true" >> $GITHUB_OUTPUT || true + git diff --name-only origin/$GITHUB_BASE_REF.. | grep -qvE '(\.rst$|^Doc|^Misc|^\.pre-commit-config\.yaml$|\.ruff\.toml$|\.md$|mypy\.ini$)' && echo "run_tests=true" >> $GITHUB_OUTPUT || true fi # Check if we should run hypothesis tests From 6ae254aaa0a5a3985a52d1ab387a2b68c001bd96 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 13 Jun 2024 16:14:50 +0200 Subject: [PATCH 183/373] gh-120417: Add #noqa to used imports in the stdlib (#120421) Tools such as ruff can ignore "imported but unused" warnings if a line ends with "# noqa: F401". It avoids the temptation to remove an import which is used effectively. --- Lib/_pyio.py | 2 +- Lib/code.py | 2 +- Lib/codecs.py | 2 +- Lib/collections/__init__.py | 3 ++- Lib/concurrent/futures/process.py | 2 +- Lib/curses/__init__.py | 2 +- Lib/datetime.py | 4 ++-- Lib/decimal.py | 8 ++++---- Lib/hashlib.py | 2 +- Lib/lzma.py | 2 +- Lib/multiprocessing/context.py | 2 +- Lib/multiprocessing/util.py | 2 +- Lib/opcode.py | 4 ++-- Lib/operator.py | 2 +- Lib/platform.py | 2 +- Lib/pstats.py | 2 +- Lib/pydoc.py | 5 ++++- Lib/re/_constants.py | 2 +- Lib/site.py | 6 +++--- Lib/sqlite3/__main__.py | 2 +- Lib/struct.py | 4 ++-- Lib/symtable.py | 6 +++--- Lib/unittest/__init__.py | 4 ++-- Lib/urllib/request.py | 2 +- Lib/xml/dom/__init__.py | 2 +- 25 files changed, 40 insertions(+), 36 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index a3fede699218a1..7d298e1674b49a 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -16,7 +16,7 @@ _setmode = None import io -from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) +from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) # noqa: F401 valid_seek_flags = {0, 1, 2} # Hardwired values if hasattr(os, 'SEEK_HOLE') : diff --git a/Lib/code.py b/Lib/code.py index b93902ccf545b3..a55fced0704b1d 100644 --- a/Lib/code.py +++ b/Lib/code.py @@ -355,7 +355,7 @@ def interact(banner=None, readfunc=None, local=None, exitmsg=None, local_exit=Fa console.raw_input = readfunc else: try: - import readline + import readline # noqa: F401 except ImportError: pass console.interact(banner, exitmsg) diff --git a/Lib/codecs.py b/Lib/codecs.py index 9b35b6127dd01c..a887e5d4c94a38 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -1129,4 +1129,4 @@ def make_encoding_map(decoding_map): # package _false = 0 if _false: - import encodings + import encodings # noqa: F401 diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index a17100e6c02a0e..b47e728484c8ac 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -46,7 +46,8 @@ _collections_abc.MutableSequence.register(deque) try: - from _collections import _deque_iterator + # Expose _deque_iterator to support pickling deque iterators + from _collections import _deque_iterator # noqa: F401 except ImportError: pass diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index bb4892ebdfedf5..7092b4757b5429 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -589,7 +589,7 @@ def _check_system_limits(): raise NotImplementedError(_system_limited) _system_limits_checked = True try: - import multiprocessing.synchronize + import multiprocessing.synchronize # noqa: F401 except ImportError: _system_limited = ( "This Python build lacks multiprocessing.synchronize, usually due " diff --git a/Lib/curses/__init__.py b/Lib/curses/__init__.py index 69270bfcd2b205..6165fe6c9875c0 100644 --- a/Lib/curses/__init__.py +++ b/Lib/curses/__init__.py @@ -53,7 +53,7 @@ def start_color(): try: has_key except NameError: - from .has_key import has_key + from .has_key import has_key # noqa: F401 # Wrapper for the entire curses-based application. Runs a function which # should be the rest of your curses-based application. If the application diff --git a/Lib/datetime.py b/Lib/datetime.py index a33d2d724cb33d..b4f7bd045c7b68 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -1,9 +1,9 @@ try: from _datetime import * - from _datetime import __doc__ + from _datetime import __doc__ # noqa: F401 except ImportError: from _pydatetime import * - from _pydatetime import __doc__ + from _pydatetime import __doc__ # noqa: F401 __all__ = ("date", "datetime", "time", "timedelta", "timezone", "tzinfo", "MINYEAR", "MAXYEAR", "UTC") diff --git a/Lib/decimal.py b/Lib/decimal.py index d61e374b9f9998..13a0dcb77f1267 100644 --- a/Lib/decimal.py +++ b/Lib/decimal.py @@ -100,9 +100,9 @@ try: from _decimal import * - from _decimal import __version__ - from _decimal import __libmpdec_version__ + from _decimal import __version__ # noqa: F401 + from _decimal import __libmpdec_version__ # noqa: F401 except ImportError: from _pydecimal import * - from _pydecimal import __version__ - from _pydecimal import __libmpdec_version__ + from _pydecimal import __version__ # noqa: F401 + from _pydecimal import __libmpdec_version__ # noqa: F401 diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 1b16441cb60ba7..da0577023cf47d 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -187,7 +187,7 @@ def __hash_new(name, data=b'', **kwargs): try: # OpenSSL's scrypt requires OpenSSL 1.1+ - from _hashlib import scrypt + from _hashlib import scrypt # noqa: F401 except ImportError: pass diff --git a/Lib/lzma.py b/Lib/lzma.py index c1e3d33deb69a1..946066aa0fba56 100644 --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -25,7 +25,7 @@ import io import os from _lzma import * -from _lzma import _encode_filter_properties, _decode_filter_properties +from _lzma import _encode_filter_properties, _decode_filter_properties # noqa: F401 import _compression diff --git a/Lib/multiprocessing/context.py b/Lib/multiprocessing/context.py index de8a264829dff3..ddcc7e7900999e 100644 --- a/Lib/multiprocessing/context.py +++ b/Lib/multiprocessing/context.py @@ -167,7 +167,7 @@ def allow_connection_pickling(self): ''' # This is undocumented. In previous versions of multiprocessing # its only effect was to make socket objects inheritable on Windows. - from . import connection + from . import connection # noqa: F401 def set_executable(self, executable): '''Sets the path to a python.exe or pythonw.exe binary used to run diff --git a/Lib/multiprocessing/util.py b/Lib/multiprocessing/util.py index 75dde02d88c533..4f471fbde71ace 100644 --- a/Lib/multiprocessing/util.py +++ b/Lib/multiprocessing/util.py @@ -14,7 +14,7 @@ import atexit import threading # we want threading to install it's # cleanup function before multiprocessing does -from subprocess import _args_from_interpreter_flags +from subprocess import _args_from_interpreter_flags # noqa: F401 from . import process diff --git a/Lib/opcode.py b/Lib/opcode.py index 85e37ff53e577f..85c0834c698ba2 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -12,8 +12,8 @@ import _opcode from _opcode import stack_effect -from _opcode_metadata import (_specializations, _specialized_opmap, opmap, - HAVE_ARGUMENT, MIN_INSTRUMENTED_OPCODE) +from _opcode_metadata import (_specializations, _specialized_opmap, opmap, # noqa: F401 + HAVE_ARGUMENT, MIN_INSTRUMENTED_OPCODE) # noqa: F401 EXTENDED_ARG = opmap['EXTENDED_ARG'] opname = ['<%r>' % (op,) for op in range(max(opmap.values()) + 1)] diff --git a/Lib/operator.py b/Lib/operator.py index 02ccdaa13ddb31..6d2a762bc95b6d 100644 --- a/Lib/operator.py +++ b/Lib/operator.py @@ -415,7 +415,7 @@ def ixor(a, b): except ImportError: pass else: - from _operator import __doc__ + from _operator import __doc__ # noqa: F401 # All of these "__func__ = func" assignments have to happen after importing # from _operator to make sure they're set to the right function diff --git a/Lib/platform.py b/Lib/platform.py index a4fd2463f15a6c..d6322c9d99d2f3 100644 --- a/Lib/platform.py +++ b/Lib/platform.py @@ -546,7 +546,7 @@ def java_ver(release='', vendor='', vminfo=('', '', ''), osinfo=('', '', '')): warnings._deprecated('java_ver', remove=(3, 15)) # Import the needed APIs try: - import java.lang + import java.lang # noqa: F401 except ImportError: return release, vendor, vminfo, osinfo diff --git a/Lib/pstats.py b/Lib/pstats.py index 2f054bb4011e7f..a174a545456e1a 100644 --- a/Lib/pstats.py +++ b/Lib/pstats.py @@ -611,7 +611,7 @@ def f8(x): if __name__ == '__main__': import cmd try: - import readline + import readline # noqa: F401 except ImportError: pass diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 278e4846ebb71f..be5cd9a80db710 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -75,9 +75,12 @@ class or function within a module or module in a package. If the from reprlib import Repr from traceback import format_exception_only -from _pyrepl.pager import (get_pager, plain, pipe_pager, +from _pyrepl.pager import (get_pager, pipe_pager, plain_pager, tempfile_pager, tty_pager) +# Expose plain() as pydoc.plain() +from _pyrepl.pager import plain # noqa: F401 + # --------------------------------------------------------- old names diff --git a/Lib/re/_constants.py b/Lib/re/_constants.py index 9c3c294ba448b4..4cb88c96d92715 100644 --- a/Lib/re/_constants.py +++ b/Lib/re/_constants.py @@ -15,7 +15,7 @@ MAGIC = 20230612 -from _sre import MAXREPEAT, MAXGROUPS +from _sre import MAXREPEAT, MAXGROUPS # noqa: F401 # SRE standard exception (access as sre.error) # should this really be here? diff --git a/Lib/site.py b/Lib/site.py index 7eace190f5ab21..9381f6f510eb46 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -486,7 +486,7 @@ def register_readline(): import atexit try: import readline - import rlcompleter + import rlcompleter # noqa: F401 import _pyrepl.readline import _pyrepl.unix_console except ImportError: @@ -603,7 +603,7 @@ def execsitecustomize(): """Run custom site specific code, if available.""" try: try: - import sitecustomize + import sitecustomize # noqa: F401 except ImportError as exc: if exc.name == 'sitecustomize': pass @@ -623,7 +623,7 @@ def execusercustomize(): """Run custom user specific code, if available.""" try: try: - import usercustomize + import usercustomize # noqa: F401 except ImportError as exc: if exc.name == 'usercustomize': pass diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py index b93b84384a0925..d9423c25e34135 100644 --- a/Lib/sqlite3/__main__.py +++ b/Lib/sqlite3/__main__.py @@ -117,7 +117,7 @@ def main(*args): # No SQL provided; start the REPL. console = SqliteInteractiveConsole(con) try: - import readline + import readline # noqa: F401 except ImportError: pass console.interact(banner, exitmsg="") diff --git a/Lib/struct.py b/Lib/struct.py index d6bba588636498..ff98e8c4cb3f1d 100644 --- a/Lib/struct.py +++ b/Lib/struct.py @@ -11,5 +11,5 @@ ] from _struct import * -from _struct import _clearcache -from _struct import __doc__ +from _struct import _clearcache # noqa: F401 +from _struct import __doc__ # noqa: F401 diff --git a/Lib/symtable.py b/Lib/symtable.py index d6ac1f527ba8ba..f8ba3496439535 100644 --- a/Lib/symtable.py +++ b/Lib/symtable.py @@ -3,9 +3,9 @@ import _symtable from _symtable import ( USE, - DEF_GLOBAL, DEF_NONLOCAL, DEF_LOCAL, - DEF_PARAM, DEF_TYPE_PARAM, - DEF_FREE_CLASS, + DEF_GLOBAL, # noqa: F401 + DEF_NONLOCAL, DEF_LOCAL, + DEF_PARAM, DEF_TYPE_PARAM, DEF_FREE_CLASS, DEF_IMPORT, DEF_BOUND, DEF_ANNOT, DEF_COMP_ITER, DEF_COMP_CELL, SCOPE_OFF, SCOPE_MASK, diff --git a/Lib/unittest/__init__.py b/Lib/unittest/__init__.py index f1f6c911ef17d9..324e5d038aef03 100644 --- a/Lib/unittest/__init__.py +++ b/Lib/unittest/__init__.py @@ -57,9 +57,9 @@ def testMultiply(self): from .case import (addModuleCleanup, TestCase, FunctionTestCase, SkipTest, skip, skipIf, skipUnless, expectedFailure, doModuleCleanups, enterModuleContext) -from .suite import BaseTestSuite, TestSuite +from .suite import BaseTestSuite, TestSuite # noqa: F401 from .loader import TestLoader, defaultTestLoader -from .main import TestProgram, main +from .main import TestProgram, main # noqa: F401 from .runner import TextTestRunner, TextTestResult from .signals import installHandler, registerResult, removeResult, removeHandler # IsolatedAsyncioTestCase will be imported lazily. diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index ac6719ce854182..58b0cb574a764a 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -108,7 +108,7 @@ # check for SSL try: - import ssl + import ssl # noqa: F401 except ImportError: _have_ssl = False else: diff --git a/Lib/xml/dom/__init__.py b/Lib/xml/dom/__init__.py index 97cf9a6429993d..dd7fb996afd616 100644 --- a/Lib/xml/dom/__init__.py +++ b/Lib/xml/dom/__init__.py @@ -137,4 +137,4 @@ class UserDataHandler: EMPTY_NAMESPACE = None EMPTY_PREFIX = None -from .domreg import getDOMImplementation, registerDOMImplementation +from .domreg import getDOMImplementation, registerDOMImplementation # noqa: F401 From 2078eb45ca0db495972a20fcaf96df8fcf48451d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 13 Jun 2024 16:28:59 +0200 Subject: [PATCH 184/373] gh-120397: Optimize str.count() for single characters (#120398) --- ...-06-12-13-47-25.gh-issue-120397.n-I_cc.rst | 2 ++ Objects/stringlib/fastsearch.h | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst new file mode 100644 index 00000000000000..05c55e8a45eb12 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst @@ -0,0 +1,2 @@ +Improve the througput by up to two times for the :meth:`str.count`, :meth:`bytes.count` and :meth:`bytearray.count` +methods for counting single characters. diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 309ed1554f4699..05e700b06258f0 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -753,6 +753,22 @@ STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n, } +static inline Py_ssize_t +STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n, + const STRINGLIB_CHAR p0) +/* A specialized function of count_char that does not cut off at a maximum. + As a result, the compiler is able to vectorize the loop. */ +{ + Py_ssize_t count = 0; + for (Py_ssize_t i = 0; i < n; i++) { + if (s[i] == p0) { + count++; + } + } + return count; +} + + Py_LOCAL_INLINE(Py_ssize_t) FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, const STRINGLIB_CHAR* p, Py_ssize_t m, @@ -773,6 +789,9 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, else if (mode == FAST_RSEARCH) return STRINGLIB(rfind_char)(s, n, p[0]); else { + if (maxcount == PY_SSIZE_T_MAX) { + return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); + } return STRINGLIB(count_char)(s, n, p[0], maxcount); } } From 6af190f8d0c5dcb1a875072a30caee2eaf448483 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 13 Jun 2024 19:53:45 +0300 Subject: [PATCH 185/373] gh-120397: Fix typo in NEWS entry (#120455) --- .../2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst index 05c55e8a45eb12..24f046d9d89d51 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst @@ -1,2 +1,2 @@ -Improve the througput by up to two times for the :meth:`str.count`, :meth:`bytes.count` and :meth:`bytearray.count` +Improve the throughput by up to two times for the :meth:`str.count`, :meth:`bytes.count` and :meth:`bytearray.count` methods for counting single characters. From c2d810b6d4deeea530648a8d0983e3a2adf6c942 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 13 Jun 2024 18:58:46 +0100 Subject: [PATCH 186/373] GH-119054: Add "Creating files and directories" section to pathlib docs. (#120186) Add dedicated subsection for `pathlib.Path.touch()`, `mkdir()`, `symlink_to()` and `hardlink_to()`. Also note that `open()`, `write_text()` and `write_bytes()` are often used to create files. Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/pathlib.rst | 165 +++++++++++++++++++++------------------- 1 file changed, 86 insertions(+), 79 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index b7ab44706a0160..138e41404dec9c 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1343,6 +1343,92 @@ Reading directories .. versionadded:: 3.12 +Creating files and directories +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. method:: Path.touch(mode=0o666, exist_ok=True) + + Create a file at this given path. If *mode* is given, it is combined + with the process's ``umask`` value to determine the file mode and access + flags. If the file already exists, the function succeeds when *exist_ok* + is true (and its modification time is updated to the current time), + otherwise :exc:`FileExistsError` is raised. + + .. seealso:: + The :meth:`~Path.open`, :meth:`~Path.write_text` and + :meth:`~Path.write_bytes` methods are often used to create files. + + +.. method:: Path.mkdir(mode=0o777, parents=False, exist_ok=False) + + Create a new directory at this given path. If *mode* is given, it is + combined with the process's ``umask`` value to determine the file mode + and access flags. If the path already exists, :exc:`FileExistsError` + is raised. + + If *parents* is true, any missing parents of this path are created + as needed; they are created with the default permissions without taking + *mode* into account (mimicking the POSIX ``mkdir -p`` command). + + If *parents* is false (the default), a missing parent raises + :exc:`FileNotFoundError`. + + If *exist_ok* is false (the default), :exc:`FileExistsError` is + raised if the target directory already exists. + + If *exist_ok* is true, :exc:`FileExistsError` will not be raised unless the given + path already exists in the file system and is not a directory (same + behavior as the POSIX ``mkdir -p`` command). + + .. versionchanged:: 3.5 + The *exist_ok* parameter was added. + + +.. method:: Path.symlink_to(target, target_is_directory=False) + + Make this path a symbolic link pointing to *target*. + + On Windows, a symlink represents either a file or a directory, and does not + morph to the target dynamically. If the target is present, the type of the + symlink will be created to match. Otherwise, the symlink will be created + as a directory if *target_is_directory* is true or a file symlink (the + default) otherwise. On non-Windows platforms, *target_is_directory* is ignored. + + :: + + >>> p = Path('mylink') + >>> p.symlink_to('setup.py') + >>> p.resolve() + PosixPath('/home/antoine/pathlib/setup.py') + >>> p.stat().st_size + 956 + >>> p.lstat().st_size + 8 + + .. note:: + The order of arguments (link, target) is the reverse + of :func:`os.symlink`'s. + + .. versionchanged:: 3.13 + Raises :exc:`UnsupportedOperation` if :func:`os.symlink` is not + available. In previous versions, :exc:`NotImplementedError` was raised. + + +.. method:: Path.hardlink_to(target) + + Make this path a hard link to the same file as *target*. + + .. note:: + The order of arguments (link, target) is the reverse + of :func:`os.link`'s. + + .. versionadded:: 3.10 + + .. versionchanged:: 3.13 + Raises :exc:`UnsupportedOperation` if :func:`os.link` is not + available. In previous versions, :exc:`NotImplementedError` was raised. + + Other methods ^^^^^^^^^^^^^ @@ -1426,31 +1512,6 @@ Other methods symbolic link's mode is changed rather than its target's. -.. method:: Path.mkdir(mode=0o777, parents=False, exist_ok=False) - - Create a new directory at this given path. If *mode* is given, it is - combined with the process' ``umask`` value to determine the file mode - and access flags. If the path already exists, :exc:`FileExistsError` - is raised. - - If *parents* is true, any missing parents of this path are created - as needed; they are created with the default permissions without taking - *mode* into account (mimicking the POSIX ``mkdir -p`` command). - - If *parents* is false (the default), a missing parent raises - :exc:`FileNotFoundError`. - - If *exist_ok* is false (the default), :exc:`FileExistsError` is - raised if the target directory already exists. - - If *exist_ok* is true, :exc:`FileExistsError` will not be raised unless the given - path already exists in the file system and is not a directory (same - behavior as the POSIX ``mkdir -p`` command). - - .. versionchanged:: 3.5 - The *exist_ok* parameter was added. - - .. method:: Path.owner(*, follow_symlinks=True) Return the name of the user owning the file. :exc:`KeyError` is raised @@ -1572,60 +1633,6 @@ Other methods Remove this directory. The directory must be empty. -.. method:: Path.symlink_to(target, target_is_directory=False) - - Make this path a symbolic link pointing to *target*. - - On Windows, a symlink represents either a file or a directory, and does not - morph to the target dynamically. If the target is present, the type of the - symlink will be created to match. Otherwise, the symlink will be created - as a directory if *target_is_directory* is ``True`` or a file symlink (the - default) otherwise. On non-Windows platforms, *target_is_directory* is ignored. - - :: - - >>> p = Path('mylink') - >>> p.symlink_to('setup.py') - >>> p.resolve() - PosixPath('/home/antoine/pathlib/setup.py') - >>> p.stat().st_size - 956 - >>> p.lstat().st_size - 8 - - .. note:: - The order of arguments (link, target) is the reverse - of :func:`os.symlink`'s. - - .. versionchanged:: 3.13 - Raises :exc:`UnsupportedOperation` if :func:`os.symlink` is not - available. In previous versions, :exc:`NotImplementedError` was raised. - - -.. method:: Path.hardlink_to(target) - - Make this path a hard link to the same file as *target*. - - .. note:: - The order of arguments (link, target) is the reverse - of :func:`os.link`'s. - - .. versionadded:: 3.10 - - .. versionchanged:: 3.13 - Raises :exc:`UnsupportedOperation` if :func:`os.link` is not - available. In previous versions, :exc:`NotImplementedError` was raised. - - -.. method:: Path.touch(mode=0o666, exist_ok=True) - - Create a file at this given path. If *mode* is given, it is combined - with the process' ``umask`` value to determine the file mode and access - flags. If the file already exists, the function succeeds if *exist_ok* - is true (and its modification time is updated to the current time), - otherwise :exc:`FileExistsError` is raised. - - .. method:: Path.unlink(missing_ok=False) Remove this file or symbolic link. If the path points to a directory, From 50a389565aa0b480792ed06a2ab56fb5a72fc2d8 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Fri, 14 Jun 2024 03:05:03 +0900 Subject: [PATCH 187/373] gh-117398: Add datetime C-API type check test for subinterpreters (gh-119604) Check if the DateTime C-API type matches the datetime.date type on main and shared/isolated subinterpreters. --- Lib/test/datetimetester.py | 41 ++++++++++++++++++++++++++++++ Lib/test/support/__init__.py | 2 +- Modules/_testcapi/datetime.c | 48 +++++++++++++++++++++++++++++++++--- 3 files changed, 87 insertions(+), 4 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 28f75a803b4e04..45188731eed688 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -13,6 +13,7 @@ import re import struct import sys +import textwrap import unittest import warnings @@ -38,6 +39,10 @@ import _testcapi except ImportError: _testcapi = None +try: + import _interpreters +except ModuleNotFoundError: + _interpreters = None # Needed by test_datetime import _strptime @@ -6780,6 +6785,42 @@ def test_datetime_from_timestamp(self): self.assertEqual(dt_orig, dt_rt) + def test_type_check_in_subinterp(self): + script = textwrap.dedent(f""" + if {_interpreters is None}: + import _testcapi as module + module.test_datetime_capi() + else: + import importlib.machinery + import importlib.util + fullname = '_testcapi_datetime' + origin = importlib.util.find_spec('_testcapi').origin + loader = importlib.machinery.ExtensionFileLoader(fullname, origin) + spec = importlib.util.spec_from_loader(fullname, loader) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + def run(type_checker, obj): + if not type_checker(obj, True): + raise TypeError(f'{{type(obj)}} is not C API type') + + import _datetime + run(module.datetime_check_date, _datetime.date.today()) + run(module.datetime_check_datetime, _datetime.datetime.now()) + run(module.datetime_check_time, _datetime.time(12, 30)) + run(module.datetime_check_delta, _datetime.timedelta(1)) + run(module.datetime_check_tzinfo, _datetime.tzinfo()) + """) + if _interpreters is None: + ret = support.run_in_subinterp(script) + self.assertEqual(ret, 0) + else: + for name in ('isolated', 'legacy'): + with self.subTest(name): + config = _interpreters.new_config(name).__dict__ + ret = support.run_in_subinterp_with_config(script, **config) + self.assertEqual(ret, 0) + def load_tests(loader, standard_tests, pattern): standard_tests.addTest(ZoneInfoCompleteTest()) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 9e6100d2b89d6e..adc6362e20df00 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1808,7 +1808,7 @@ def run_in_subinterp_with_config(code, *, own_gil=None, **config): config['gil'] = 'shared' elif gil == 2: config['gil'] = 'own' - else: + elif not isinstance(gil, str): raise NotImplementedError(gil) config = types.SimpleNamespace(**config) return _testinternalcapi.run_in_subinterp_with_config(code, config) diff --git a/Modules/_testcapi/datetime.c b/Modules/_testcapi/datetime.c index b1796039f0d83a..f3d54215e04232 100644 --- a/Modules/_testcapi/datetime.c +++ b/Modules/_testcapi/datetime.c @@ -22,10 +22,17 @@ test_datetime_capi(PyObject *self, PyObject *args) test_run_counter++; PyDateTime_IMPORT; - if (PyDateTimeAPI) { - Py_RETURN_NONE; + if (PyDateTimeAPI == NULL) { + return NULL; } - return NULL; + // The following C API types need to outlive interpreters, since the + // borrowed references to them can be held by users without being updated. + assert(!PyType_HasFeature(PyDateTimeAPI->DateType, Py_TPFLAGS_HEAPTYPE)); + assert(!PyType_HasFeature(PyDateTimeAPI->TimeType, Py_TPFLAGS_HEAPTYPE)); + assert(!PyType_HasFeature(PyDateTimeAPI->DateTimeType, Py_TPFLAGS_HEAPTYPE)); + assert(!PyType_HasFeature(PyDateTimeAPI->DeltaType, Py_TPFLAGS_HEAPTYPE)); + assert(!PyType_HasFeature(PyDateTimeAPI->TZInfoType, Py_TPFLAGS_HEAPTYPE)); + Py_RETURN_NONE; } /* Functions exposing the C API type checking for testing */ @@ -479,3 +486,38 @@ _PyTestCapi_Init_DateTime(PyObject *mod) } return 0; } + + +/* --------------------------------------------------------------------------- + * Test module for subinterpreters. + */ + +static int +_testcapi_datetime_exec(PyObject *mod) +{ + if (test_datetime_capi(NULL, NULL) == NULL) { + return -1; + } + return 0; +} + +static PyModuleDef_Slot _testcapi_datetime_slots[] = { + {Py_mod_exec, _testcapi_datetime_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL}, +}; + +static struct PyModuleDef _testcapi_datetime_module = { + PyModuleDef_HEAD_INIT, + .m_name = "_testcapi_datetime", + .m_size = 0, + .m_methods = test_methods, + .m_slots = _testcapi_datetime_slots, +}; + +PyMODINIT_FUNC +PyInit__testcapi_datetime(void) +{ + return PyModuleDef_Init(&_testcapi_datetime_module); +} From 6674c63dc7bb175acc997ddcb799e8dbbafd2968 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 13 Jun 2024 21:01:05 +0200 Subject: [PATCH 188/373] Add codeowner for Makefile.pre.in and Modules/Setup* (#120468) --- .github/CODEOWNERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8bc40fcb9e8999..1f9047ab97e934 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -13,6 +13,8 @@ # Build system configure* @erlend-aasland @corona10 +Makefile.pre.in @erlend-aasland +Modules/Setup* @erlend-aasland # asyncio **/*asyncio* @1st1 @asvetlov @gvanrossum @kumaraditya303 @willingc From a3711afefa7a520b3de01be3b2367cb830d1fc84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 13 Jun 2024 21:03:01 +0200 Subject: [PATCH 189/373] gh-120012: clarify the behaviour of `multiprocessing.Queue.empty` on closed queues. (GH-120102) * improve doc for `multiprocessing.Queue.empty` * add tests for checking emptiness of queues Co-authored-by: Gregory P. Smith --- Doc/library/multiprocessing.rst | 4 +++ Lib/test/_test_multiprocessing.py | 26 +++++++++++++++++++ ...-06-05-12-36-18.gh-issue-120012.f14DbQ.rst | 3 +++ 3 files changed, 33 insertions(+) create mode 100644 Misc/NEWS.d/next/Documentation/2024-06-05-12-36-18.gh-issue-120012.f14DbQ.rst diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 49762491bae5f4..426291c5f0743d 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -837,6 +837,8 @@ For an example of the usage of queues for interprocess communication see Return ``True`` if the queue is empty, ``False`` otherwise. Because of multithreading/multiprocessing semantics, this is not reliable. + May raise an :exc:`OSError` on closed queues. (not guaranteed) + .. method:: full() Return ``True`` if the queue is full, ``False`` otherwise. Because of @@ -940,6 +942,8 @@ For an example of the usage of queues for interprocess communication see Return ``True`` if the queue is empty, ``False`` otherwise. + Always raises an :exc:`OSError` if the SimpleQueue is closed. + .. method:: get() Remove and return an item from the queue. diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 301541a666e140..4b3a0645cfc84a 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -1332,6 +1332,23 @@ def _on_queue_feeder_error(e, obj): self.assertTrue(not_serializable_obj.reduce_was_called) self.assertTrue(not_serializable_obj.on_queue_feeder_error_was_called) + def test_closed_queue_empty_exceptions(self): + # Assert that checking the emptiness of an unused closed queue + # does not raise an OSError. The rationale is that q.close() is + # a no-op upon construction and becomes effective once the queue + # has been used (e.g., by calling q.put()). + for q in multiprocessing.Queue(), multiprocessing.JoinableQueue(): + q.close() # this is a no-op since the feeder thread is None + q.join_thread() # this is also a no-op + self.assertTrue(q.empty()) + + for q in multiprocessing.Queue(), multiprocessing.JoinableQueue(): + q.put('foo') # make sure that the queue is 'used' + q.close() # close the feeder thread + q.join_thread() # make sure to join the feeder thread + with self.assertRaisesRegex(OSError, 'is closed'): + q.empty() + def test_closed_queue_put_get_exceptions(self): for q in multiprocessing.Queue(), multiprocessing.JoinableQueue(): q.close() @@ -5815,6 +5832,15 @@ def _test_empty(cls, queue, child_can_start, parent_can_continue): finally: parent_can_continue.set() + def test_empty_exceptions(self): + # Assert that checking emptiness of a closed queue raises + # an OSError, independently of whether the queue was used + # or not. This differs from Queue and JoinableQueue. + q = multiprocessing.SimpleQueue() + q.close() # close the pipe + with self.assertRaisesRegex(OSError, 'is closed'): + q.empty() + def test_empty(self): queue = multiprocessing.SimpleQueue() child_can_start = multiprocessing.Event() diff --git a/Misc/NEWS.d/next/Documentation/2024-06-05-12-36-18.gh-issue-120012.f14DbQ.rst b/Misc/NEWS.d/next/Documentation/2024-06-05-12-36-18.gh-issue-120012.f14DbQ.rst new file mode 100644 index 00000000000000..2bf0c977b90387 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2024-06-05-12-36-18.gh-issue-120012.f14DbQ.rst @@ -0,0 +1,3 @@ +Clarify the behaviours of :meth:`multiprocessing.Queue.empty` and +:meth:`multiprocessing.SimpleQueue.empty` on closed queues. +Patch by Bénédikt Tran. From d88a1f2e156cd1072119afa91d4f4dc4037c1b21 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 13 Jun 2024 21:25:26 +0100 Subject: [PATCH 190/373] GH-119054: Add "Renaming and deleting" section to pathlib docs. (#120465) Add dedicated subsection for `pathlib.Path.rename()`, `replace()`, `unlink()` and `rmdir()`. --- Doc/library/pathlib.rst | 124 +++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 60 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 138e41404dec9c..278851549c6c3b 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1429,6 +1429,70 @@ Creating files and directories available. In previous versions, :exc:`NotImplementedError` was raised. +Renaming and deleting +^^^^^^^^^^^^^^^^^^^^^ + +.. method:: Path.rename(target) + + Rename this file or directory to the given *target*, and return a new + :class:`!Path` instance pointing to *target*. On Unix, if *target* exists + and is a file, it will be replaced silently if the user has permission. + On Windows, if *target* exists, :exc:`FileExistsError` will be raised. + *target* can be either a string or another path object:: + + >>> p = Path('foo') + >>> p.open('w').write('some text') + 9 + >>> target = Path('bar') + >>> p.rename(target) + PosixPath('bar') + >>> target.open().read() + 'some text' + + The target path may be absolute or relative. Relative paths are interpreted + relative to the current working directory, *not* the directory of the + :class:`!Path` object. + + It is implemented in terms of :func:`os.rename` and gives the same guarantees. + + .. versionchanged:: 3.8 + Added return value, return the new :class:`!Path` instance. + + +.. method:: Path.replace(target) + + Rename this file or directory to the given *target*, and return a new + :class:`!Path` instance pointing to *target*. If *target* points to an + existing file or empty directory, it will be unconditionally replaced. + + The target path may be absolute or relative. Relative paths are interpreted + relative to the current working directory, *not* the directory of the + :class:`!Path` object. + + .. versionchanged:: 3.8 + Added return value, return the new :class:`!Path` instance. + + +.. method:: Path.unlink(missing_ok=False) + + Remove this file or symbolic link. If the path points to a directory, + use :func:`Path.rmdir` instead. + + If *missing_ok* is false (the default), :exc:`FileNotFoundError` is + raised if the path does not exist. + + If *missing_ok* is true, :exc:`FileNotFoundError` exceptions will be + ignored (same behavior as the POSIX ``rm -f`` command). + + .. versionchanged:: 3.8 + The *missing_ok* parameter was added. + + +.. method:: Path.rmdir() + + Remove this directory. The directory must be empty. + + Other methods ^^^^^^^^^^^^^ @@ -1545,47 +1609,6 @@ Other methods available. In previous versions, :exc:`NotImplementedError` was raised. -.. method:: Path.rename(target) - - Rename this file or directory to the given *target*, and return a new Path - instance pointing to *target*. On Unix, if *target* exists and is a file, - it will be replaced silently if the user has permission. - On Windows, if *target* exists, :exc:`FileExistsError` will be raised. - *target* can be either a string or another path object:: - - >>> p = Path('foo') - >>> p.open('w').write('some text') - 9 - >>> target = Path('bar') - >>> p.rename(target) - PosixPath('bar') - >>> target.open().read() - 'some text' - - The target path may be absolute or relative. Relative paths are interpreted - relative to the current working directory, *not* the directory of the Path - object. - - It is implemented in terms of :func:`os.rename` and gives the same guarantees. - - .. versionchanged:: 3.8 - Added return value, return the new Path instance. - - -.. method:: Path.replace(target) - - Rename this file or directory to the given *target*, and return a new Path - instance pointing to *target*. If *target* points to an existing file or - empty directory, it will be unconditionally replaced. - - The target path may be absolute or relative. Relative paths are interpreted - relative to the current working directory, *not* the directory of the Path - object. - - .. versionchanged:: 3.8 - Added return value, return the new Path instance. - - .. method:: Path.absolute() Make the path absolute, without normalization or resolving symlinks. @@ -1628,25 +1651,6 @@ Other methods strict mode, and no exception is raised in non-strict mode. In previous versions, :exc:`RuntimeError` is raised no matter the value of *strict*. -.. method:: Path.rmdir() - - Remove this directory. The directory must be empty. - - -.. method:: Path.unlink(missing_ok=False) - - Remove this file or symbolic link. If the path points to a directory, - use :func:`Path.rmdir` instead. - - If *missing_ok* is false (the default), :exc:`FileNotFoundError` is - raised if the path does not exist. - - If *missing_ok* is true, :exc:`FileNotFoundError` exceptions will be - ignored (same behavior as the POSIX ``rm -f`` command). - - .. versionchanged:: 3.8 - The *missing_ok* parameter was added. - .. _pathlib-pattern-language: From 42351c3b9a357ec67135b30ed41f59e6f306ac52 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Thu, 13 Jun 2024 22:16:40 +0100 Subject: [PATCH 191/373] gh-114053: Fix bad interaction of PEP 695, PEP 563 and `inspect.get_annotations` (#120270) --- Lib/inspect.py | 8 +- .../inspect_stringized_annotations_pep695.py | 72 ++++++++++++ Lib/test/test_inspect/test_inspect.py | 103 ++++++++++++++++++ ...-06-08-15-15-29.gh-issue-114053.WQLAFG.rst | 4 + 4 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 Lib/test/test_inspect/inspect_stringized_annotations_pep695.py create mode 100644 Misc/NEWS.d/next/Library/2024-06-08-15-15-29.gh-issue-114053.WQLAFG.rst diff --git a/Lib/inspect.py b/Lib/inspect.py index 5570a43ebfea19..11544b8d0d4932 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -274,7 +274,13 @@ def get_annotations(obj, *, globals=None, locals=None, eval_str=False): if globals is None: globals = obj_globals if locals is None: - locals = obj_locals + locals = obj_locals or {} + + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` + if type_params := getattr(obj, "__type_params__", ()): + locals = {param.__name__: param for param in type_params} | locals return_value = {key: value if not isinstance(value, str) else eval(value, globals, locals) diff --git a/Lib/test/test_inspect/inspect_stringized_annotations_pep695.py b/Lib/test/test_inspect/inspect_stringized_annotations_pep695.py new file mode 100644 index 00000000000000..723822f8eaa92d --- /dev/null +++ b/Lib/test/test_inspect/inspect_stringized_annotations_pep695.py @@ -0,0 +1,72 @@ +from __future__ import annotations +from typing import Callable, Unpack + + +class A[T, *Ts, **P]: + x: T + y: tuple[*Ts] + z: Callable[P, str] + + +class B[T, *Ts, **P]: + T = int + Ts = str + P = bytes + x: T + y: Ts + z: P + + +Eggs = int +Spam = str + + +class C[Eggs, **Spam]: + x: Eggs + y: Spam + + +def generic_function[T, *Ts, **P]( + x: T, *y: Unpack[Ts], z: P.args, zz: P.kwargs +) -> None: ... + + +def generic_function_2[Eggs, **Spam](x: Eggs, y: Spam): pass + + +class D: + Foo = int + Bar = str + + def generic_method[Foo, **Bar]( + self, x: Foo, y: Bar + ) -> None: ... + + def generic_method_2[Eggs, **Spam](self, x: Eggs, y: Spam): pass + + +def nested(): + from types import SimpleNamespace + from inspect import get_annotations + + Eggs = bytes + Spam = memoryview + + + class E[Eggs, **Spam]: + x: Eggs + y: Spam + + def generic_method[Eggs, **Spam](self, x: Eggs, y: Spam): pass + + + def generic_function[Eggs, **Spam](x: Eggs, y: Spam): pass + + + return SimpleNamespace( + E=E, + E_annotations=get_annotations(E, eval_str=True), + E_meth_annotations=get_annotations(E.generic_method, eval_str=True), + generic_func=generic_function, + generic_func_annotations=get_annotations(generic_function, eval_str=True) + ) diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index 0a4fa9343f15e0..140efac530afb2 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -22,6 +22,7 @@ import types import tempfile import textwrap +from typing import Unpack import unicodedata import unittest import unittest.mock @@ -47,6 +48,7 @@ from test.test_inspect import inspect_stock_annotations from test.test_inspect import inspect_stringized_annotations from test.test_inspect import inspect_stringized_annotations_2 +from test.test_inspect import inspect_stringized_annotations_pep695 # Functions tested in this suite: @@ -1692,6 +1694,107 @@ def wrapper(a, b): self.assertEqual(inspect.get_annotations(isa.MyClassWithLocalAnnotations), {'x': 'mytype'}) self.assertEqual(inspect.get_annotations(isa.MyClassWithLocalAnnotations, eval_str=True), {'x': int}) + def test_pep695_generic_class_with_future_annotations(self): + ann_module695 = inspect_stringized_annotations_pep695 + A_annotations = inspect.get_annotations(ann_module695.A, eval_str=True) + A_type_params = ann_module695.A.__type_params__ + self.assertIs(A_annotations["x"], A_type_params[0]) + self.assertEqual(A_annotations["y"].__args__[0], Unpack[A_type_params[1]]) + self.assertIs(A_annotations["z"].__args__[0], A_type_params[2]) + + def test_pep695_generic_class_with_future_annotations_and_local_shadowing(self): + B_annotations = inspect.get_annotations( + inspect_stringized_annotations_pep695.B, eval_str=True + ) + self.assertEqual(B_annotations, {"x": int, "y": str, "z": bytes}) + + def test_pep695_generic_class_with_future_annotations_name_clash_with_global_vars(self): + ann_module695 = inspect_stringized_annotations_pep695 + C_annotations = inspect.get_annotations(ann_module695.C, eval_str=True) + self.assertEqual( + set(C_annotations.values()), + set(ann_module695.C.__type_params__) + ) + + def test_pep_695_generic_function_with_future_annotations(self): + ann_module695 = inspect_stringized_annotations_pep695 + generic_func_annotations = inspect.get_annotations( + ann_module695.generic_function, eval_str=True + ) + func_t_params = ann_module695.generic_function.__type_params__ + self.assertEqual( + generic_func_annotations.keys(), {"x", "y", "z", "zz", "return"} + ) + self.assertIs(generic_func_annotations["x"], func_t_params[0]) + self.assertEqual(generic_func_annotations["y"], Unpack[func_t_params[1]]) + self.assertIs(generic_func_annotations["z"].__origin__, func_t_params[2]) + self.assertIs(generic_func_annotations["zz"].__origin__, func_t_params[2]) + + def test_pep_695_generic_function_with_future_annotations_name_clash_with_global_vars(self): + self.assertEqual( + set( + inspect.get_annotations( + inspect_stringized_annotations_pep695.generic_function_2, + eval_str=True + ).values() + ), + set( + inspect_stringized_annotations_pep695.generic_function_2.__type_params__ + ) + ) + + def test_pep_695_generic_method_with_future_annotations(self): + ann_module695 = inspect_stringized_annotations_pep695 + generic_method_annotations = inspect.get_annotations( + ann_module695.D.generic_method, eval_str=True + ) + params = { + param.__name__: param + for param in ann_module695.D.generic_method.__type_params__ + } + self.assertEqual( + generic_method_annotations, + {"x": params["Foo"], "y": params["Bar"], "return": None} + ) + + def test_pep_695_generic_method_with_future_annotations_name_clash_with_global_vars(self): + self.assertEqual( + set( + inspect.get_annotations( + inspect_stringized_annotations_pep695.D.generic_method_2, + eval_str=True + ).values() + ), + set( + inspect_stringized_annotations_pep695.D.generic_method_2.__type_params__ + ) + ) + + def test_pep_695_generics_with_future_annotations_nested_in_function(self): + results = inspect_stringized_annotations_pep695.nested() + + self.assertEqual( + set(results.E_annotations.values()), + set(results.E.__type_params__) + ) + self.assertEqual( + set(results.E_meth_annotations.values()), + set(results.E.generic_method.__type_params__) + ) + self.assertNotEqual( + set(results.E_meth_annotations.values()), + set(results.E.__type_params__) + ) + self.assertEqual( + set(results.E_meth_annotations.values()).intersection(results.E.__type_params__), + set() + ) + + self.assertEqual( + set(results.generic_func_annotations.values()), + set(results.generic_func.__type_params__) + ) + class TestFormatAnnotation(unittest.TestCase): def test_typing_replacement(self): diff --git a/Misc/NEWS.d/next/Library/2024-06-08-15-15-29.gh-issue-114053.WQLAFG.rst b/Misc/NEWS.d/next/Library/2024-06-08-15-15-29.gh-issue-114053.WQLAFG.rst new file mode 100644 index 00000000000000..be49577a712867 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-08-15-15-29.gh-issue-114053.WQLAFG.rst @@ -0,0 +1,4 @@ +Fix erroneous :exc:`NameError` when calling :func:`inspect.get_annotations` +with ``eval_str=True``` on a class that made use of :pep:`695` type +parameters in a module that had ``from __future__ import annotations`` at +the top of the file. Patch by Alex Waygood. From 41554ef0e0925695544d96a7bc49af1428d6bb6b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 14 Jun 2024 10:21:35 -0500 Subject: [PATCH 192/373] Stronger tests for the statistics kernel formulas (gh-120506) --- Lib/test/test_statistics.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 0b28459f03d86a..c374c947e02a6b 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2434,18 +2434,22 @@ def integrate(func, low, high, steps=10_000): data.append(100) self.assertGreater(f_hat(100), 0.0) - def test_kde_kernel_invcdfs(self): + def test_kde_kernel_specs(self): + # White-box test for the kernel formulas in isolation from + # their downstream use in kde() and kde_random() kernel_specs = statistics._kernel_specs - kde = statistics.kde # Verify that cdf / invcdf will round trip xarr = [i/100 for i in range(-100, 101)] + parr = [i/1000 + 5/10000 for i in range(1000)] for kernel, spec in kernel_specs.items(): + cdf = spec['cdf'] invcdf = spec['invcdf'] with self.subTest(kernel=kernel): - cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True) for x in xarr: self.assertAlmostEqual(invcdf(cdf(x)), x, places=6) + for p in parr: + self.assertAlmostEqual(cdf(invcdf(p)), p, places=11) @support.requires_resource('cpu') def test_kde_random(self): From 27419f1fce05a18384e6fb3b8ad59b7f532821e6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 14 Jun 2024 11:00:46 -0500 Subject: [PATCH 193/373] Update tests for the itertools docs rough equivalents (#120509) --- Lib/test/test_itertools.py | 333 +++++++++++++++++++++++++++++++++++-- 1 file changed, 315 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 53b8064c3cfe82..5fd6ecf37427f7 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1587,27 +1587,169 @@ def batched_recipe(iterable, n): self.assertEqual(r1, r2) self.assertEqual(e1, e2) + + def test_groupby_recipe(self): + + # Begin groupby() recipe ####################################### + + def groupby(iterable, key=None): + # [k for k, g in groupby('AAAABBBCCDAABBB')] → A B C D A B + # [list(g) for k, g in groupby('AAAABBBCCD')] → AAAA BBB CC D + + keyfunc = (lambda x: x) if key is None else key + iterator = iter(iterable) + exhausted = False + + def _grouper(target_key): + nonlocal curr_value, curr_key, exhausted + yield curr_value + for curr_value in iterator: + curr_key = keyfunc(curr_value) + if curr_key != target_key: + return + yield curr_value + exhausted = True + + try: + curr_value = next(iterator) + except StopIteration: + return + curr_key = keyfunc(curr_value) + + while not exhausted: + target_key = curr_key + curr_group = _grouper(target_key) + yield curr_key, curr_group + if curr_key == target_key: + for _ in curr_group: + pass + + # End groupby() recipe ######################################### + + # Check whether it accepts arguments correctly + self.assertEqual([], list(groupby([]))) + self.assertEqual([], list(groupby([], key=id))) + self.assertRaises(TypeError, list, groupby('abc', [])) + if False: + # Test not applicable to the recipe + self.assertRaises(TypeError, list, groupby('abc', None)) + self.assertRaises(TypeError, groupby, 'abc', lambda x:x, 10) + + # Check normal input + s = [(0, 10, 20), (0, 11,21), (0,12,21), (1,13,21), (1,14,22), + (2,15,22), (3,16,23), (3,17,23)] + dup = [] + for k, g in groupby(s, lambda r:r[0]): + for elem in g: + self.assertEqual(k, elem[0]) + dup.append(elem) + self.assertEqual(s, dup) + + # Check nested case + dup = [] + for k, g in groupby(s, testR): + for ik, ig in groupby(g, testR2): + for elem in ig: + self.assertEqual(k, elem[0]) + self.assertEqual(ik, elem[2]) + dup.append(elem) + self.assertEqual(s, dup) + + # Check case where inner iterator is not used + keys = [k for k, g in groupby(s, testR)] + expectedkeys = set([r[0] for r in s]) + self.assertEqual(set(keys), expectedkeys) + self.assertEqual(len(keys), len(expectedkeys)) + + # Check case where inner iterator is used after advancing the groupby + # iterator + s = list(zip('AABBBAAAA', range(9))) + it = groupby(s, testR) + _, g1 = next(it) + _, g2 = next(it) + _, g3 = next(it) + self.assertEqual(list(g1), []) + self.assertEqual(list(g2), []) + self.assertEqual(next(g3), ('A', 5)) + list(it) # exhaust the groupby iterator + self.assertEqual(list(g3), []) + + # Exercise pipes and filters style + s = 'abracadabra' + # sort s | uniq + r = [k for k, g in groupby(sorted(s))] + self.assertEqual(r, ['a', 'b', 'c', 'd', 'r']) + # sort s | uniq -d + r = [k for k, g in groupby(sorted(s)) if list(islice(g,1,2))] + self.assertEqual(r, ['a', 'b', 'r']) + # sort s | uniq -c + r = [(len(list(g)), k) for k, g in groupby(sorted(s))] + self.assertEqual(r, [(5, 'a'), (2, 'b'), (1, 'c'), (1, 'd'), (2, 'r')]) + # sort s | uniq -c | sort -rn | head -3 + r = sorted([(len(list(g)) , k) for k, g in groupby(sorted(s))], reverse=True)[:3] + self.assertEqual(r, [(5, 'a'), (2, 'r'), (2, 'b')]) + + # iter.__next__ failure + class ExpectedError(Exception): + pass + def delayed_raise(n=0): + for i in range(n): + yield 'yo' + raise ExpectedError + def gulp(iterable, keyp=None, func=list): + return [func(g) for k, g in groupby(iterable, keyp)] + + # iter.__next__ failure on outer object + self.assertRaises(ExpectedError, gulp, delayed_raise(0)) + # iter.__next__ failure on inner object + self.assertRaises(ExpectedError, gulp, delayed_raise(1)) + + # __eq__ failure + class DummyCmp: + def __eq__(self, dst): + raise ExpectedError + s = [DummyCmp(), DummyCmp(), None] + + # __eq__ failure on outer object + self.assertRaises(ExpectedError, gulp, s, func=id) + # __eq__ failure on inner object + self.assertRaises(ExpectedError, gulp, s) + + # keyfunc failure + def keyfunc(obj): + if keyfunc.skip > 0: + keyfunc.skip -= 1 + return obj + else: + raise ExpectedError + + # keyfunc failure on outer object + keyfunc.skip = 0 + self.assertRaises(ExpectedError, gulp, [None], keyfunc) + keyfunc.skip = 1 + self.assertRaises(ExpectedError, gulp, [None, None], keyfunc) + + @staticmethod def islice(iterable, *args): + # islice('ABCDEFG', 2) → A B + # islice('ABCDEFG', 2, 4) → C D + # islice('ABCDEFG', 2, None) → C D E F G + # islice('ABCDEFG', 0, None, 2) → A C E G + s = slice(*args) - start, stop, step = s.start or 0, s.stop or sys.maxsize, s.step or 1 - it = iter(range(start, stop, step)) - try: - nexti = next(it) - except StopIteration: - # Consume *iterable* up to the *start* position. - for i, element in zip(range(start), iterable): - pass - return - try: - for i, element in enumerate(iterable): - if i == nexti: - yield element - nexti = next(it) - except StopIteration: - # Consume to *stop*. - for i, element in zip(range(i + 1, stop), iterable): - pass + start = 0 if s.start is None else s.start + stop = s.stop + step = 1 if s.step is None else s.step + if start < 0 or (stop is not None and stop < 0) or step <= 0: + raise ValueError + + indices = count() if stop is None else range(max(start, stop)) + next_i = start + for i, element in zip(indices, iterable): + if i == next_i: + yield element + next_i += step def test_islice_recipe(self): self.assertEqual(list(self.islice('ABCDEFG', 2)), list('AB')) @@ -1627,6 +1769,161 @@ def test_islice_recipe(self): self.assertEqual(next(c), 3) + def test_tee_recipe(self): + + # Begin tee() recipe ########################################### + + def tee(iterable, n=2): + iterator = iter(iterable) + shared_link = [None, None] + return tuple(_tee(iterator, shared_link) for _ in range(n)) + + def _tee(iterator, link): + try: + while True: + if link[1] is None: + link[0] = next(iterator) + link[1] = [None, None] + value, link = link + yield value + except StopIteration: + return + + # End tee() recipe ############################################# + + n = 200 + + a, b = tee([]) # test empty iterator + self.assertEqual(list(a), []) + self.assertEqual(list(b), []) + + a, b = tee(irange(n)) # test 100% interleaved + self.assertEqual(lzip(a,b), lzip(range(n), range(n))) + + a, b = tee(irange(n)) # test 0% interleaved + self.assertEqual(list(a), list(range(n))) + self.assertEqual(list(b), list(range(n))) + + a, b = tee(irange(n)) # test dealloc of leading iterator + for i in range(100): + self.assertEqual(next(a), i) + del a + self.assertEqual(list(b), list(range(n))) + + a, b = tee(irange(n)) # test dealloc of trailing iterator + for i in range(100): + self.assertEqual(next(a), i) + del b + self.assertEqual(list(a), list(range(100, n))) + + for j in range(5): # test randomly interleaved + order = [0]*n + [1]*n + random.shuffle(order) + lists = ([], []) + its = tee(irange(n)) + for i in order: + value = next(its[i]) + lists[i].append(value) + self.assertEqual(lists[0], list(range(n))) + self.assertEqual(lists[1], list(range(n))) + + # test argument format checking + self.assertRaises(TypeError, tee) + self.assertRaises(TypeError, tee, 3) + self.assertRaises(TypeError, tee, [1,2], 'x') + self.assertRaises(TypeError, tee, [1,2], 3, 'x') + + # Tests not applicable to the tee() recipe + if False: + # tee object should be instantiable + a, b = tee('abc') + c = type(a)('def') + self.assertEqual(list(c), list('def')) + + # test long-lagged and multi-way split + a, b, c = tee(range(2000), 3) + for i in range(100): + self.assertEqual(next(a), i) + self.assertEqual(list(b), list(range(2000))) + self.assertEqual([next(c), next(c)], list(range(2))) + self.assertEqual(list(a), list(range(100,2000))) + self.assertEqual(list(c), list(range(2,2000))) + + # Tests not applicable to the tee() recipe + if False: + # test invalid values of n + self.assertRaises(TypeError, tee, 'abc', 'invalid') + self.assertRaises(ValueError, tee, [], -1) + + for n in range(5): + result = tee('abc', n) + self.assertEqual(type(result), tuple) + self.assertEqual(len(result), n) + self.assertEqual([list(x) for x in result], [list('abc')]*n) + + + # Tests not applicable to the tee() recipe + if False: + # tee pass-through to copyable iterator + a, b = tee('abc') + c, d = tee(a) + self.assertTrue(a is c) + + # test tee_new + t1, t2 = tee('abc') + tnew = type(t1) + self.assertRaises(TypeError, tnew) + self.assertRaises(TypeError, tnew, 10) + t3 = tnew(t1) + self.assertTrue(list(t1) == list(t2) == list(t3) == list('abc')) + + # test that tee objects are weak referencable + a, b = tee(range(10)) + p = weakref.proxy(a) + self.assertEqual(getattr(p, '__class__'), type(b)) + del a + gc.collect() # For PyPy or other GCs. + self.assertRaises(ReferenceError, getattr, p, '__class__') + + ans = list('abc') + long_ans = list(range(10000)) + + # Tests not applicable to the tee() recipe + if False: + # check copy + a, b = tee('abc') + self.assertEqual(list(copy.copy(a)), ans) + self.assertEqual(list(copy.copy(b)), ans) + a, b = tee(list(range(10000))) + self.assertEqual(list(copy.copy(a)), long_ans) + self.assertEqual(list(copy.copy(b)), long_ans) + + # check partially consumed copy + a, b = tee('abc') + take(2, a) + take(1, b) + self.assertEqual(list(copy.copy(a)), ans[2:]) + self.assertEqual(list(copy.copy(b)), ans[1:]) + self.assertEqual(list(a), ans[2:]) + self.assertEqual(list(b), ans[1:]) + a, b = tee(range(10000)) + take(100, a) + take(60, b) + self.assertEqual(list(copy.copy(a)), long_ans[100:]) + self.assertEqual(list(copy.copy(b)), long_ans[60:]) + self.assertEqual(list(a), long_ans[100:]) + self.assertEqual(list(b), long_ans[60:]) + + # Issue 13454: Crash when deleting backward iterator from tee() + forward, backward = tee(repeat(None, 2000)) # 20000000 + try: + any(forward) # exhaust the iterator + del backward + except: + del forward, backward + raise + + class TestGC(unittest.TestCase): def makecycle(self, iterator, container): From 2bacc2343c24c49292dea3461f6b7664fc2d33e2 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 15 Jun 2024 00:10:18 +0800 Subject: [PATCH 194/373] gh-117657: Add TSAN suppression for set_default_allocator_unlocked (#120500) Add TSAN suppression for set_default_allocator_unlocked --- Tools/tsan/suppressions_free_threading.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 05ceaf438b6353..4c8b0b8abd2963 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -63,6 +63,8 @@ race_top:tstate_is_freed race_top:type_modified_unlocked race_top:write_thread_id race_top:PyThreadState_Clear +# Only seen on macOS, sample: https://gist.github.com/aisk/dda53f5d494a4556c35dde1fce03259c +race_top:set_default_allocator_unlocked # https://gist.github.com/mpage/6962e8870606cfc960e159b407a0cb40 thread:pthread_create From 7c38097add9cc24e9f68414cd3e5e1b6cbe38a17 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 14 Jun 2024 17:15:49 +0100 Subject: [PATCH 195/373] GH-73991: Add `pathlib.Path.copy()` (#119058) Add a `Path.copy()` method that copies the content of one file to another. This method is similar to `shutil.copyfile()` but differs in the following ways: - Uses `fcntl.FICLONE` where available (see GH-81338) - Uses `os.copy_file_range` where available (see GH-81340) - Uses `_winapi.CopyFile2` where available, even though this copies more metadata than the other implementations. This makes `WindowsPath.copy()` more similar to `shutil.copy2()`. The method is presently _less_ specified than the `shutil` functions to allow OS-specific optimizations that might copy more or less metadata. Incorporates code from GH-81338 and GH-93152. Co-authored-by: Eryk Sun --- Doc/library/pathlib.rst | 18 ++- Doc/whatsnew/3.14.rst | 7 + Lib/pathlib/_abc.py | 30 ++++ Lib/pathlib/_local.py | 16 ++ Lib/pathlib/_os.py | 138 ++++++++++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 62 ++++++++ ...4-05-15-01-36-08.gh-issue-73991.CGknDf.rst | 2 + 7 files changed, 271 insertions(+), 2 deletions(-) create mode 100644 Lib/pathlib/_os.py create mode 100644 Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 278851549c6c3b..c8a3272d7bab4c 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1429,8 +1429,22 @@ Creating files and directories available. In previous versions, :exc:`NotImplementedError` was raised. -Renaming and deleting -^^^^^^^^^^^^^^^^^^^^^ +Copying, renaming and deleting +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. method:: Path.copy(target) + + Copy the contents of this file to the *target* file. If *target* specifies + a file that already exists, it will be replaced. + + .. note:: + This method uses operating system functionality to copy file content + efficiently. The OS might also copy some metadata, such as file + permissions. After the copy is complete, users may wish to call + :meth:`Path.chmod` to set the permissions of the target file. + + .. versionadded:: 3.14 + .. method:: Path.rename(target) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b357553735e8bb..a102af13a08362 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -100,6 +100,13 @@ os by :func:`os.unsetenv`, or made outside Python in the same process. (Contributed by Victor Stinner in :gh:`120057`.) +pathlib +------- + +* Add :meth:`pathlib.Path.copy`, which copies the content of one file to + another, like :func:`shutil.copyfile`. + (Contributed by Barney Gale in :gh:`73991`.) + symtable -------- diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index ecea8e88d1a2e3..586145ead384ea 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -16,6 +16,7 @@ import posixpath from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO +from ._os import copyfileobj __all__ = ["UnsupportedOperation"] @@ -563,6 +564,15 @@ def samefile(self, other_path): return (st.st_ino == other_st.st_ino and st.st_dev == other_st.st_dev) + def _samefile_safe(self, other_path): + """ + Like samefile(), but returns False rather than raising OSError. + """ + try: + return self.samefile(other_path) + except (OSError, ValueError): + return False + def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): """ @@ -780,6 +790,26 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ raise UnsupportedOperation(self._unsupported_msg('mkdir()')) + def copy(self, target): + """ + Copy the contents of this file to the given target. + """ + if not isinstance(target, PathBase): + target = self.with_segments(target) + if self._samefile_safe(target): + raise OSError(f"{self!r} and {target!r} are the same file") + with self.open('rb') as source_f: + try: + with target.open('wb') as target_f: + copyfileobj(source_f, target_f) + except IsADirectoryError as e: + if not target.exists(): + # Raise a less confusing exception. + raise FileNotFoundError( + f'Directory does not exist: {target}') from e + else: + raise + def rename(self, target): """ Rename this path to the target path. diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 473fd525768b50..cffed10dbd1207 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -18,6 +18,7 @@ grp = None from ._abc import UnsupportedOperation, PurePathBase, PathBase +from ._os import copyfile __all__ = [ @@ -780,6 +781,21 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): if not exist_ok or not self.is_dir(): raise + if copyfile: + def copy(self, target): + """ + Copy the contents of this file to the given target. + """ + try: + target = os.fspath(target) + except TypeError: + if isinstance(target, PathBase): + # Target is an instance of PathBase but not os.PathLike. + # Use generic implementation from PathBase. + return PathBase.copy(self, target) + raise + copyfile(os.fspath(self), target) + def chmod(self, mode, *, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py new file mode 100644 index 00000000000000..1771d54e4167c1 --- /dev/null +++ b/Lib/pathlib/_os.py @@ -0,0 +1,138 @@ +""" +Low-level OS functionality wrappers used by pathlib. +""" + +from errno import EBADF, EOPNOTSUPP, ETXTBSY, EXDEV +import os +import sys +try: + import fcntl +except ImportError: + fcntl = None +try: + import posix +except ImportError: + posix = None +try: + import _winapi +except ImportError: + _winapi = None + + +def get_copy_blocksize(infd): + """Determine blocksize for fastcopying on Linux. + Hopefully the whole file will be copied in a single call. + The copying itself should be performed in a loop 'till EOF is + reached (0 return) so a blocksize smaller or bigger than the actual + file size should not make any difference, also in case the file + content changes while being copied. + """ + try: + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB + except OSError: + blocksize = 2 ** 27 # 128 MiB + # On 32-bit architectures truncate to 1 GiB to avoid OverflowError, + # see gh-82500. + if sys.maxsize < 2 ** 32: + blocksize = min(blocksize, 2 ** 30) + return blocksize + + +if fcntl and hasattr(fcntl, 'FICLONE'): + def clonefd(source_fd, target_fd): + """ + Perform a lightweight copy of two files, where the data blocks are + copied only when modified. This is known as Copy on Write (CoW), + instantaneous copy or reflink. + """ + fcntl.ioctl(target_fd, fcntl.FICLONE, source_fd) +else: + clonefd = None + + +if posix and hasattr(posix, '_fcopyfile'): + def copyfd(source_fd, target_fd): + """ + Copy a regular file content using high-performance fcopyfile(3) + syscall (macOS). + """ + posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA) +elif hasattr(os, 'copy_file_range'): + def copyfd(source_fd, target_fd): + """ + Copy data from one regular mmap-like fd to another by using a + high-performance copy_file_range(2) syscall that gives filesystems + an opportunity to implement the use of reflinks or server-side + copy. + This should work on Linux >= 4.5 only. + """ + blocksize = get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.copy_file_range(source_fd, target_fd, blocksize, + offset_dst=offset) + if sent == 0: + break # EOF + offset += sent +elif hasattr(os, 'sendfile'): + def copyfd(source_fd, target_fd): + """Copy data from one regular mmap-like fd to another by using + high-performance sendfile(2) syscall. + This should work on Linux >= 2.6.33 only. + """ + blocksize = get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.sendfile(target_fd, source_fd, offset, blocksize) + if sent == 0: + break # EOF + offset += sent +else: + copyfd = None + + +if _winapi and hasattr(_winapi, 'CopyFile2'): + def copyfile(source, target): + """ + Copy from one file to another using CopyFile2 (Windows only). + """ + _winapi.CopyFile2(source, target, 0) +else: + copyfile = None + + +def copyfileobj(source_f, target_f): + """ + Copy data from file-like object source_f to file-like object target_f. + """ + try: + source_fd = source_f.fileno() + target_fd = target_f.fileno() + except Exception: + pass # Fall through to generic code. + else: + try: + # Use OS copy-on-write where available. + if clonefd: + try: + clonefd(source_fd, target_fd) + return + except OSError as err: + if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV): + raise err + + # Use OS copy where available. + if copyfd: + copyfd(source_fd, target_fd) + return + except OSError as err: + # Produce more useful error messages. + err.filename = source_f.name + err.filename2 = target_f.name + raise err + + # Last resort: copy with fileobj read() and write(). + read_source = source_f.read + write_target = target_f.write + while buf := read_source(1024 * 1024): + write_target(buf) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 57cc1612c03468..fd71284159d5c0 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1696,6 +1696,68 @@ def test_write_text_with_newlines(self): self.assertEqual((p / 'fileA').read_bytes(), b'abcde' + os_linesep_byte + b'fghlk' + os_linesep_byte + b'\rmnopq') + def test_copy_file(self): + base = self.cls(self.base) + source = base / 'fileA' + target = base / 'copyA' + source.copy(target) + self.assertTrue(target.exists()) + self.assertEqual(source.read_text(), target.read_text()) + + def test_copy_directory(self): + base = self.cls(self.base) + source = base / 'dirA' + target = base / 'copyA' + with self.assertRaises(OSError): + source.copy(target) + + @needs_symlinks + def test_copy_symlink(self): + base = self.cls(self.base) + source = base / 'linkA' + target = base / 'copyA' + source.copy(target) + self.assertTrue(target.exists()) + self.assertFalse(target.is_symlink()) + self.assertEqual(source.read_text(), target.read_text()) + + def test_copy_to_existing_file(self): + base = self.cls(self.base) + source = base / 'fileA' + target = base / 'dirB' / 'fileB' + source.copy(target) + self.assertTrue(target.exists()) + self.assertEqual(source.read_text(), target.read_text()) + + def test_copy_to_existing_directory(self): + base = self.cls(self.base) + source = base / 'fileA' + target = base / 'dirA' + with self.assertRaises(OSError): + source.copy(target) + + @needs_symlinks + def test_copy_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + source.copy(target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) + + def test_copy_empty(self): + base = self.cls(self.base) + source = base / 'empty' + target = base / 'copyA' + source.write_bytes(b'') + source.copy(target) + self.assertTrue(target.exists()) + self.assertEqual(target.read_bytes(), b'') + def test_iterdir(self): P = self.cls p = P(self.base) diff --git a/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst b/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst new file mode 100644 index 00000000000000..c2953c65b2720f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-15-01-36-08.gh-issue-73991.CGknDf.rst @@ -0,0 +1,2 @@ +Add :meth:`pathlib.Path.copy`, which copies the content of one file to another, +like :func:`shutil.copyfile`. From 7fadfd82ebf6ea90b38cb3f2a046a51f8601a205 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 14 Jun 2024 20:25:35 +0300 Subject: [PATCH 196/373] gh-120361: Add `nonmember` test with enum flags inside to `test_enum` (GH-120364) * gh-120361: Add `nonmember` test with enum flags inside to `test_enum` --- Doc/library/enum.rst | 2 +- Lib/test/test_enum.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Doc/library/enum.rst b/Doc/library/enum.rst index 8c604c2347a547..9cf94e342dad28 100644 --- a/Doc/library/enum.rst +++ b/Doc/library/enum.rst @@ -527,7 +527,7 @@ Data Types ``Flag`` is the same as :class:`Enum`, but its members support the bitwise operators ``&`` (*AND*), ``|`` (*OR*), ``^`` (*XOR*), and ``~`` (*INVERT*); - the results of those operators are members of the enumeration. + the results of those operations are (aliases of) members of the enumeration. .. method:: __contains__(self, value) diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 529dfc62eff680..99fd16ba361e6f 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -1495,6 +1495,27 @@ class SpamEnum(Enum): spam = nonmember(SpamEnumIsInner) self.assertTrue(SpamEnum.spam is SpamEnumIsInner) + def test_using_members_as_nonmember(self): + class Example(Flag): + A = 1 + B = 2 + ALL = nonmember(A | B) + + self.assertEqual(Example.A.value, 1) + self.assertEqual(Example.B.value, 2) + self.assertEqual(Example.ALL, 3) + self.assertIs(type(Example.ALL), int) + + class Example(Flag): + A = auto() + B = auto() + ALL = nonmember(A | B) + + self.assertEqual(Example.A.value, 1) + self.assertEqual(Example.B.value, 2) + self.assertEqual(Example.ALL, 3) + self.assertIs(type(Example.ALL), int) + def test_nested_classes_in_enum_with_member(self): """Support locally-defined nested classes.""" class Outer(Enum): From ed60ab5fab6d187068cb3e0f0d4192ebf3a228b7 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Fri, 14 Jun 2024 11:25:23 -0700 Subject: [PATCH 197/373] gh-119824: Print stack entry when user input is needed (#119882) Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- Doc/library/pdb.rst | 10 +++- Lib/pdb.py | 50 +++++++++++++++---- Lib/test/test_pdb.py | 50 ++++++++++++++++--- ...-05-31-21-17-43.gh-issue-119824.CQlxWV.rst | 1 + 4 files changed, 90 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-31-21-17-43.gh-issue-119824.CQlxWV.rst diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst index f6085171dccb38..b1e9392ecfd927 100644 --- a/Doc/library/pdb.rst +++ b/Doc/library/pdb.rst @@ -321,11 +321,17 @@ can be overridden by the local file. argument must be an identifier, ``help exec`` must be entered to get help on the ``!`` command. -.. pdbcommand:: w(here) +.. pdbcommand:: w(here) [count] - Print a stack trace, with the most recent frame at the bottom. An arrow (``>``) + Print a stack trace, with the most recent frame at the bottom. if *count* + is 0, print the current frame entry. If *count* is negative, print the least + recent - *count* frames. If *count* is positive, print the most recent + *count* frames. An arrow (``>``) indicates the current frame, which determines the context of most commands. + .. versionchanged:: 3.14 + *count* argument is added. + .. pdbcommand:: d(own) [count] Move the current frame *count* (default one) levels down in the stack trace diff --git a/Lib/pdb.py b/Lib/pdb.py index ba84a29aa2f669..ddbfb9d2bb6244 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -603,10 +603,18 @@ def interaction(self, frame, tb_or_exc): assert tb is not None, "main exception must have a traceback" with self._hold_exceptions(_chained_exceptions): self.setup(frame, tb) - # if we have more commands to process, do not show the stack entry - if not self.cmdqueue: + # We should print the stack entry if and only if the user input + # is expected, and we should print it right before the user input. + # If self.cmdqueue is not empty, we append a "w 0" command to the + # queue, which is equivalent to print_stack_entry + if self.cmdqueue: + self.cmdqueue.append('w 0') + else: self.print_stack_entry(self.stack[self.curindex]) self._cmdloop() + # If "w 0" is not used, pop it out + if self.cmdqueue and self.cmdqueue[-1] == 'w 0': + self.cmdqueue.pop() self.forget() def displayhook(self, obj): @@ -1401,16 +1409,24 @@ def do_clear(self, arg): complete_cl = _complete_location def do_where(self, arg): - """w(here) + """w(here) [count] - Print a stack trace, with the most recent frame at the bottom. + Print a stack trace. If count is not specified, print the full stack. + If count is 0, print the current frame entry. If count is positive, + print count entries from the most recent frame. If count is negative, + print -count entries from the least recent frame. An arrow indicates the "current frame", which determines the context of most commands. 'bt' is an alias for this command. """ - if arg: - self._print_invalid_arg(arg) - return - self.print_stack_trace() + if not arg: + count = None + else: + try: + count = int(arg) + except ValueError: + self.error('Invalid count (%s)' % arg) + return + self.print_stack_trace(count) do_w = do_where do_bt = do_where @@ -2065,10 +2081,22 @@ def complete_unalias(self, text, line, begidx, endidx): # It is also consistent with the up/down commands (which are # compatible with dbx and gdb: up moves towards 'main()' # and down moves towards the most recent stack frame). - - def print_stack_trace(self): + # * if count is None, prints the full stack + # * if count = 0, prints the current frame entry + # * if count < 0, prints -count least recent frame entries + # * if count > 0, prints count most recent frame entries + + def print_stack_trace(self, count=None): + if count is None: + stack_to_print = self.stack + elif count == 0: + stack_to_print = [self.stack[self.curindex]] + elif count < 0: + stack_to_print = self.stack[:-count] + else: + stack_to_print = self.stack[-count:] try: - for frame_lineno in self.stack: + for frame_lineno in stack_to_print: self.print_stack_entry(frame_lineno) except KeyboardInterrupt: pass diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index cf69bc415c9b69..5edf68dc3b429b 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -781,7 +781,7 @@ def test_pdb_where_command(): ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() >>> def f(): - ... g(); + ... g() >>> def test_function(): ... f() @@ -789,8 +789,13 @@ def test_pdb_where_command(): >>> with PdbTestInput([ # doctest: +ELLIPSIS ... 'w', ... 'where', + ... 'w 1', + ... 'w invalid', ... 'u', ... 'w', + ... 'w 0', + ... 'w 100', + ... 'w -100', ... 'continue', ... ]): ... test_function() @@ -798,35 +803,63 @@ def test_pdb_where_command(): -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() (Pdb) w ... - (8)() + (13)() -> test_function() (2)test_function() -> f() (2)f() - -> g(); + -> g() > (2)g() -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() (Pdb) where ... - (8)() + (13)() -> test_function() (2)test_function() -> f() (2)f() - -> g(); + -> g() > (2)g() -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + (Pdb) w 1 + > (2)g() + -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + (Pdb) w invalid + *** Invalid count (invalid) (Pdb) u > (2)f() - -> g(); + -> g() (Pdb) w ... - (8)() + (13)() + -> test_function() + (2)test_function() + -> f() + > (2)f() + -> g() + (2)g() + -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + (Pdb) w 0 + > (2)f() + -> g() + (Pdb) w 100 + ... + (13)() -> test_function() (2)test_function() -> f() > (2)f() - -> g(); + -> g() + (2)g() + -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + (Pdb) w -100 + ... + (13)() + -> test_function() + (2)test_function() + -> f() + > (2)f() + -> g() (2)g() -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() (Pdb) continue @@ -3179,6 +3212,7 @@ def test_pdbrc_basic(self): stdout, stderr = self.run_pdb_script(script, 'q\n', pdbrc=pdbrc, remove_home=True) self.assertNotIn("SyntaxError", stdout) self.assertIn("a+8=9", stdout) + self.assertIn("-> b = 2", stdout) def test_pdbrc_empty_line(self): """Test that empty lines in .pdbrc are ignored.""" diff --git a/Misc/NEWS.d/next/Library/2024-05-31-21-17-43.gh-issue-119824.CQlxWV.rst b/Misc/NEWS.d/next/Library/2024-05-31-21-17-43.gh-issue-119824.CQlxWV.rst new file mode 100644 index 00000000000000..fd6d8d79a9d157 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-31-21-17-43.gh-issue-119824.CQlxWV.rst @@ -0,0 +1 @@ +Print stack entry in :mod:`pdb` when and only when user input is needed. From 05df063ad80becc1ba6bd07d67b55b5965f32375 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 14 Jun 2024 20:39:50 +0200 Subject: [PATCH 198/373] gh-120417: Fix "imported but unused" linter warnings (#120461) Add __all__ to the following modules: importlib.machinery, importlib.util and xml.sax. Add also "# noqa: F401" in collections.abc, subprocess and xml.sax. * Sort __all__; remove collections.abc.__all__; remove private names * Add tests --- Lib/collections/abc.py | 4 +-- Lib/importlib/machinery.py | 8 ++++++ Lib/importlib/util.py | 6 +++++ Lib/subprocess.py | 2 +- Lib/test/test_importlib/test_api.py | 40 +++++++++++++++++++++++++++++ Lib/test/test_sax.py | 18 ++++++++++++- Lib/xml/sax/__init__.py | 14 +++++++--- 7 files changed, 84 insertions(+), 8 deletions(-) diff --git a/Lib/collections/abc.py b/Lib/collections/abc.py index 86ca8b8a8414b3..bff76291634604 100644 --- a/Lib/collections/abc.py +++ b/Lib/collections/abc.py @@ -1,3 +1,3 @@ from _collections_abc import * -from _collections_abc import __all__ -from _collections_abc import _CallableGenericAlias +from _collections_abc import __all__ # noqa: F401 +from _collections_abc import _CallableGenericAlias # noqa: F401 diff --git a/Lib/importlib/machinery.py b/Lib/importlib/machinery.py index fbd30b159fb752..6e294d59bfdcb9 100644 --- a/Lib/importlib/machinery.py +++ b/Lib/importlib/machinery.py @@ -19,3 +19,11 @@ def all_suffixes(): """Returns a list of all recognized module suffixes for this process""" return SOURCE_SUFFIXES + BYTECODE_SUFFIXES + EXTENSION_SUFFIXES + + +__all__ = ['AppleFrameworkLoader', 'BYTECODE_SUFFIXES', 'BuiltinImporter', + 'DEBUG_BYTECODE_SUFFIXES', 'EXTENSION_SUFFIXES', + 'ExtensionFileLoader', 'FileFinder', 'FrozenImporter', 'ModuleSpec', + 'NamespaceLoader', 'OPTIMIZED_BYTECODE_SUFFIXES', 'PathFinder', + 'SOURCE_SUFFIXES', 'SourceFileLoader', 'SourcelessFileLoader', + 'WindowsRegistryFinder', 'all_suffixes'] diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index c94a148e4c50e0..7243d052cc27f3 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -270,3 +270,9 @@ def exec_module(self, module): loader_state['is_loading'] = False module.__spec__.loader_state = loader_state module.__class__ = _LazyModule + + +__all__ = ['LazyLoader', 'Loader', 'MAGIC_NUMBER', + 'cache_from_source', 'decode_source', 'find_spec', + 'module_from_spec', 'resolve_name', 'source_from_cache', + 'source_hash', 'spec_from_file_location', 'spec_from_loader'] diff --git a/Lib/subprocess.py b/Lib/subprocess.py index b2dcb1454c139e..bc08878db313df 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -79,7 +79,7 @@ if _mswindows: import _winapi - from _winapi import (CREATE_NEW_CONSOLE, CREATE_NEW_PROCESS_GROUP, + from _winapi import (CREATE_NEW_CONSOLE, CREATE_NEW_PROCESS_GROUP, # noqa: F401 STD_INPUT_HANDLE, STD_OUTPUT_HANDLE, STD_ERROR_HANDLE, SW_HIDE, STARTF_USESTDHANDLES, STARTF_USESHOWWINDOW, diff --git a/Lib/test/test_importlib/test_api.py b/Lib/test/test_importlib/test_api.py index 2a35f3dcb7210c..973237c0791a3e 100644 --- a/Lib/test/test_importlib/test_api.py +++ b/Lib/test/test_importlib/test_api.py @@ -6,6 +6,7 @@ import os.path import sys +from test import support from test.support import import_helper from test.support import os_helper import types @@ -437,5 +438,44 @@ def test_everyone_has___spec__(self): ) = test_util.test_both(StartupTests, machinery=machinery) +class TestModuleAll(unittest.TestCase): + def test_machinery(self): + extra = ( + # from importlib._bootstrap and importlib._bootstrap_external + 'AppleFrameworkLoader', + 'BYTECODE_SUFFIXES', + 'BuiltinImporter', + 'DEBUG_BYTECODE_SUFFIXES', + 'EXTENSION_SUFFIXES', + 'ExtensionFileLoader', + 'FileFinder', + 'FrozenImporter', + 'ModuleSpec', + 'NamespaceLoader', + 'OPTIMIZED_BYTECODE_SUFFIXES', + 'PathFinder', + 'SOURCE_SUFFIXES', + 'SourceFileLoader', + 'SourcelessFileLoader', + 'WindowsRegistryFinder', + ) + support.check__all__(self, machinery['Source'], extra=extra) + + def test_util(self): + extra = ( + # from importlib.abc, importlib._bootstrap + # and importlib._bootstrap_external + 'Loader', + 'MAGIC_NUMBER', + 'cache_from_source', + 'decode_source', + 'module_from_spec', + 'source_from_cache', + 'spec_from_file_location', + 'spec_from_loader', + ) + support.check__all__(self, util['Source'], extra=extra) + + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 9b3014a94a081e..0d0f86c145b499 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -16,6 +16,7 @@ from xml.sax.handler import (feature_namespaces, feature_external_ges, LexicalHandler) from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl +from xml import sax from io import BytesIO, StringIO import codecs import os.path @@ -25,7 +26,7 @@ from urllib.error import URLError import urllib.request from test.support import os_helper -from test.support import findfile +from test.support import findfile, check__all__ from test.support.os_helper import FakePath, TESTFN @@ -1557,5 +1558,20 @@ def characters(self, content): self.assertEqual(self.char_index, 2) +class TestModuleAll(unittest.TestCase): + def test_all(self): + extra = ( + 'ContentHandler', + 'ErrorHandler', + 'InputSource', + 'SAXException', + 'SAXNotRecognizedException', + 'SAXNotSupportedException', + 'SAXParseException', + 'SAXReaderNotAvailable', + ) + check__all__(self, sax, extra=extra) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/xml/sax/__init__.py b/Lib/xml/sax/__init__.py index b657310207cfe5..fe4582c6f8b758 100644 --- a/Lib/xml/sax/__init__.py +++ b/Lib/xml/sax/__init__.py @@ -21,9 +21,9 @@ from .xmlreader import InputSource from .handler import ContentHandler, ErrorHandler -from ._exceptions import SAXException, SAXNotRecognizedException, \ - SAXParseException, SAXNotSupportedException, \ - SAXReaderNotAvailable +from ._exceptions import (SAXException, SAXNotRecognizedException, + SAXParseException, SAXNotSupportedException, + SAXReaderNotAvailable) def parse(source, handler, errorHandler=ErrorHandler()): @@ -55,7 +55,7 @@ def parseString(string, handler, errorHandler=ErrorHandler()): # tell modulefinder that importing sax potentially imports expatreader _false = 0 if _false: - import xml.sax.expatreader + import xml.sax.expatreader # noqa: F401 import os, sys if not sys.flags.ignore_environment and "PY_SAX_PARSER" in os.environ: @@ -92,3 +92,9 @@ def make_parser(parser_list=()): def _create_parser(parser_name): drv_module = __import__(parser_name,{},{},['create_parser']) return drv_module.create_parser() + + +__all__ = ['ContentHandler', 'ErrorHandler', 'InputSource', 'SAXException', + 'SAXNotRecognizedException', 'SAXNotSupportedException', + 'SAXParseException', 'SAXReaderNotAvailable', + 'default_parser_list', 'make_parser', 'parse', 'parseString'] From b2e71ff4f8fa5b7d8117dd8125137aee3d01f015 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 14 Jun 2024 15:29:09 -0400 Subject: [PATCH 199/373] gh-120161: Fix a Crash in the _datetime Module (gh-120182) In gh-120009 I used an atexit hook to finalize the _datetime module's static types at interpreter shutdown. However, atexit hooks are executed very early in finalization, which is a problem in the few cases where a subclass of one of those static types is still alive until the final GC collection. The static builtin types don't have this probably because they are finalized toward the end, after the final GC collection. To avoid the problem for _datetime, I have applied a similar approach here. Also, credit goes to @mgorny and @neonene for the new tests. FYI, I would have liked to take a slightly cleaner approach with managed static types, but wanted to get a smaller fix in first for the sake of backporting. I'll circle back to the cleaner approach with a future change on the main branch. --- Include/internal/pycore_typeobject.h | 24 ++++-- Lib/test/datetimetester.py | 44 +++++++++- ...-06-06-17-24-43.gh-issue-120161.DahNXV.rst | 2 + Modules/_datetimemodule.c | 48 +---------- Objects/typeobject.c | 85 +++++++++++++++---- Python/pylifecycle.c | 1 + 6 files changed, 133 insertions(+), 71 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-06-17-24-43.gh-issue-120161.DahNXV.rst diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index bc295b1b066bd1..32bd19d968b917 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -17,11 +17,25 @@ extern "C" { #define _Py_TYPE_BASE_VERSION_TAG (2<<16) #define _Py_MAX_GLOBAL_TYPE_VERSION_TAG (_Py_TYPE_BASE_VERSION_TAG - 1) +/* For now we hard-code this to a value for which we are confident + all the static builtin types will fit (for all builds). */ +#define _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES 200 +#define _Py_MAX_MANAGED_STATIC_EXT_TYPES 10 +#define _Py_MAX_MANAGED_STATIC_TYPES \ + (_Py_MAX_MANAGED_STATIC_BUILTIN_TYPES + _Py_MAX_MANAGED_STATIC_EXT_TYPES) + struct _types_runtime_state { /* Used to set PyTypeObject.tp_version_tag for core static types. */ // bpo-42745: next_version_tag remains shared by all interpreters // because of static types. unsigned int next_version_tag; + + struct { + struct { + PyTypeObject *type; + int64_t interp_count; + } types[_Py_MAX_MANAGED_STATIC_TYPES]; + } managed_static; }; @@ -42,11 +56,6 @@ struct type_cache { struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP]; }; -/* For now we hard-code this to a value for which we are confident - all the static builtin types will fit (for all builds). */ -#define _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES 200 -#define _Py_MAX_MANAGED_STATIC_EXT_TYPES 10 - typedef struct { PyTypeObject *type; int isbuiltin; @@ -133,6 +142,7 @@ struct types_state { extern PyStatus _PyTypes_InitTypes(PyInterpreterState *); extern void _PyTypes_FiniTypes(PyInterpreterState *); +extern void _PyTypes_FiniExtTypes(PyInterpreterState *interp); extern void _PyTypes_Fini(PyInterpreterState *); extern void _PyTypes_AfterFork(void); @@ -171,10 +181,6 @@ extern managed_static_type_state * _PyStaticType_GetState( PyAPI_FUNC(int) _PyStaticType_InitForExtension( PyInterpreterState *interp, PyTypeObject *self); -PyAPI_FUNC(void) _PyStaticType_FiniForExtension( - PyInterpreterState *interp, - PyTypeObject *self, - int final); /* Like PyType_GetModuleState, but skips verification diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 45188731eed688..70e2e2cccdc55f 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -23,7 +23,7 @@ from test import support from test.support import is_resource_enabled, ALWAYS_EQ, LARGEST, SMALLEST -from test.support import warnings_helper +from test.support import script_helper, warnings_helper import datetime as datetime_module from datetime import MINYEAR, MAXYEAR @@ -6822,6 +6822,48 @@ def run(type_checker, obj): self.assertEqual(ret, 0) +class ExtensionModuleTests(unittest.TestCase): + + def setUp(self): + if self.__class__.__name__.endswith('Pure'): + self.skipTest('Not relevant in pure Python') + + @support.cpython_only + def test_gh_120161(self): + with self.subTest('simple'): + script = textwrap.dedent(""" + import datetime + from _ast import Tuple + f = lambda: None + Tuple.dims = property(f, f) + + class tzutc(datetime.tzinfo): + pass + """) + script_helper.assert_python_ok('-c', script) + + with self.subTest('complex'): + script = textwrap.dedent(""" + import asyncio + import datetime + from typing import Type + + class tzutc(datetime.tzinfo): + pass + _EPOCHTZ = datetime.datetime(1970, 1, 1, tzinfo=tzutc()) + + class FakeDateMeta(type): + def __instancecheck__(self, obj): + return True + class FakeDate(datetime.date, metaclass=FakeDateMeta): + pass + def pickle_fake_date(datetime_) -> Type[FakeDate]: + # A pickle function for FakeDate + return FakeDate + """) + script_helper.assert_python_ok('-c', script) + + def load_tests(loader, standard_tests, pattern): standard_tests.addTest(ZoneInfoCompleteTest()) return standard_tests diff --git a/Misc/NEWS.d/next/Library/2024-06-06-17-24-43.gh-issue-120161.DahNXV.rst b/Misc/NEWS.d/next/Library/2024-06-06-17-24-43.gh-issue-120161.DahNXV.rst new file mode 100644 index 00000000000000..c378cac44c97bf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-06-17-24-43.gh-issue-120161.DahNXV.rst @@ -0,0 +1,2 @@ +:mod:`datetime` no longer crashes in certain complex reference cycle +situations. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index cb4622893375d7..5c4f1f888d17ee 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -7129,37 +7129,6 @@ clear_state(datetime_state *st) } -/* --------------------------------------------------------------------------- - * Global module state. - */ - -// If we make _PyStaticType_*ForExtension() public -// then all this should be managed by the runtime. - -static struct { - PyMutex mutex; - int64_t interp_count; -} _globals = {0}; - -static void -callback_for_interp_exit(void *Py_UNUSED(data)) -{ - PyInterpreterState *interp = PyInterpreterState_Get(); - - assert(_globals.interp_count > 0); - PyMutex_Lock(&_globals.mutex); - _globals.interp_count -= 1; - int final = !_globals.interp_count; - PyMutex_Unlock(&_globals.mutex); - - /* They must be done in reverse order so subclasses are finalized - * before base classes. */ - for (size_t i = Py_ARRAY_LENGTH(capi_types); i > 0; i--) { - PyTypeObject *type = capi_types[i-1]; - _PyStaticType_FiniForExtension(interp, type, final); - } -} - static int init_static_types(PyInterpreterState *interp, int reloading) { @@ -7182,19 +7151,6 @@ init_static_types(PyInterpreterState *interp, int reloading) } } - PyMutex_Lock(&_globals.mutex); - assert(_globals.interp_count >= 0); - _globals.interp_count += 1; - PyMutex_Unlock(&_globals.mutex); - - /* It could make sense to add a separate callback - * for each of the types. However, for now we can take the simpler - * approach of a single callback. */ - if (PyUnstable_AtExit(interp, callback_for_interp_exit, NULL) < 0) { - callback_for_interp_exit(NULL); - return -1; - } - return 0; } @@ -7379,8 +7335,8 @@ module_clear(PyObject *mod) PyInterpreterState *interp = PyInterpreterState_Get(); clear_current_module(interp, mod); - // We take care of the static types via an interpreter atexit hook. - // See callback_for_interp_exit() above. + // The runtime takes care of the static types for us. + // See _PyTypes_FiniExtTypes().. return 0; } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 8ecab555454cdc..98e00bd25c3205 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -159,18 +159,28 @@ managed_static_type_index_clear(PyTypeObject *self) self->tp_subclasses = NULL; } -static inline managed_static_type_state * -static_builtin_state_get(PyInterpreterState *interp, PyTypeObject *self) +static PyTypeObject * +static_ext_type_lookup(PyInterpreterState *interp, size_t index, + int64_t *p_interp_count) { - return &(interp->types.builtins.initialized[ - managed_static_type_index_get(self)]); -} + assert(interp->runtime == &_PyRuntime); + assert(index < _Py_MAX_MANAGED_STATIC_EXT_TYPES); -static inline managed_static_type_state * -static_ext_type_state_get(PyInterpreterState *interp, PyTypeObject *self) -{ - return &(interp->types.for_extensions.initialized[ - managed_static_type_index_get(self)]); + size_t full_index = index + _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; + int64_t interp_count = + _PyRuntime.types.managed_static.types[full_index].interp_count; + assert((interp_count == 0) == + (_PyRuntime.types.managed_static.types[full_index].type == NULL)); + *p_interp_count = interp_count; + + PyTypeObject *type = interp->types.for_extensions.initialized[index].type; + if (type == NULL) { + return NULL; + } + assert(!interp->types.for_extensions.initialized[index].isbuiltin); + assert(type == _PyRuntime.types.managed_static.types[full_index].type); + assert(managed_static_type_index_is_set(type)); + return type; } static managed_static_type_state * @@ -202,6 +212,8 @@ static void managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self, int isbuiltin, int initial) { + assert(interp->runtime == &_PyRuntime); + size_t index; if (initial) { assert(!managed_static_type_index_is_set(self)); @@ -228,6 +240,21 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self, assert(index < _Py_MAX_MANAGED_STATIC_EXT_TYPES); } } + size_t full_index = isbuiltin + ? index + : index + _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; + + assert((initial == 1) == + (_PyRuntime.types.managed_static.types[full_index].interp_count == 0)); + _PyRuntime.types.managed_static.types[full_index].interp_count += 1; + + if (initial) { + assert(_PyRuntime.types.managed_static.types[full_index].type == NULL); + _PyRuntime.types.managed_static.types[full_index].type = self; + } + else { + assert(_PyRuntime.types.managed_static.types[full_index].type == self); + } managed_static_type_state *state = isbuiltin ? &(interp->types.builtins.initialized[index]) @@ -256,15 +283,28 @@ static void managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self, int isbuiltin, int final) { + size_t index = managed_static_type_index_get(self); + size_t full_index = isbuiltin + ? index + : index + _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; + managed_static_type_state *state = isbuiltin - ? static_builtin_state_get(interp, self) - : static_ext_type_state_get(interp, self); + ? &(interp->types.builtins.initialized[index]) + : &(interp->types.for_extensions.initialized[index]); + assert(state != NULL); + + assert(_PyRuntime.types.managed_static.types[full_index].interp_count > 0); + assert(_PyRuntime.types.managed_static.types[full_index].type == state->type); assert(state->type != NULL); state->type = NULL; assert(state->tp_weaklist == NULL); // It was already cleared out. + _PyRuntime.types.managed_static.types[full_index].interp_count -= 1; if (final) { + assert(!_PyRuntime.types.managed_static.types[full_index].interp_count); + _PyRuntime.types.managed_static.types[full_index].type = NULL; + managed_static_type_index_clear(self); } @@ -840,8 +880,12 @@ _PyTypes_Fini(PyInterpreterState *interp) struct type_cache *cache = &interp->types.type_cache; type_cache_clear(cache, NULL); + // All the managed static types should have been finalized already. + assert(interp->types.for_extensions.num_initialized == 0); + for (size_t i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { + assert(interp->types.for_extensions.initialized[i].type == NULL); + } assert(interp->types.builtins.num_initialized == 0); - // All the static builtin types should have been finalized already. for (size_t i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { assert(interp->types.builtins.initialized[i].type == NULL); } @@ -5834,9 +5878,20 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, } void -_PyStaticType_FiniForExtension(PyInterpreterState *interp, PyTypeObject *type, int final) +_PyTypes_FiniExtTypes(PyInterpreterState *interp) { - fini_static_type(interp, type, 0, final); + for (size_t i = _Py_MAX_MANAGED_STATIC_EXT_TYPES; i > 0; i--) { + if (interp->types.for_extensions.num_initialized == 0) { + break; + } + int64_t count = 0; + PyTypeObject *type = static_ext_type_lookup(interp, i-1, &count); + if (type == NULL) { + continue; + } + int final = (count == 1); + fini_static_type(interp, type, 0, final); + } } void diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index cbdf5c1b771fff..3639cf6712053e 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1818,6 +1818,7 @@ flush_std_files(void) static void finalize_interp_types(PyInterpreterState *interp) { + _PyTypes_FiniExtTypes(interp); _PyUnicode_FiniTypes(interp); _PySys_FiniTypes(interp); _PyXI_FiniTypes(interp); From e3b6cff33122554de0ef598664f5cd98de4fed6b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 14 Jun 2024 18:12:35 -0400 Subject: [PATCH 200/373] gh-120524: Temporarily Skip test_create_many_threaded In test_interpreters.test_stress (gh-120525) --- Lib/test/test_interpreters/test_stress.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_interpreters/test_stress.py b/Lib/test/test_interpreters/test_stress.py index e400535b2a0e4e..40d2d77a7b9d3e 100644 --- a/Lib/test/test_interpreters/test_stress.py +++ b/Lib/test/test_interpreters/test_stress.py @@ -22,6 +22,7 @@ def test_create_many_sequential(self): interp = interpreters.create() alive.append(interp) + @unittest.skip('(temporary) gh-120524: there is a race that needs fixing') @support.requires_resource('cpu') def test_create_many_threaded(self): alive = [] From 92f6d400f76b6a04dddd944568870f689c8fab5f Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Sat, 15 Jun 2024 08:05:18 +0800 Subject: [PATCH 201/373] gh-119819: Conditional skip of logging tests that require multiprocessing subprocess support (#120476) Skip tests that require multiprocessing subprocess support. --- Lib/test/test_logging.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index ef2d4a621be962..504862ad53395e 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -3898,6 +3898,7 @@ def do_queuehandler_configuration(self, qspec, lspec): self.addCleanup(os.remove, fn) @threading_helper.requires_working_threading() + @support.requires_subprocess() def test_config_queue_handler(self): q = CustomQueue() dq = { @@ -3926,12 +3927,10 @@ def test_config_queue_handler(self): msg = str(ctx.exception) self.assertEqual(msg, "Unable to configure handler 'ah'") + @support.requires_subprocess() def test_multiprocessing_queues(self): # See gh-119819 - # will skip test if it's not available - import_helper.import_module('_multiprocessing') - cd = copy.deepcopy(self.config_queue_handler) from multiprocessing import Queue as MQ, Manager as MM q1 = MQ() # this can't be pickled From 5c58e728b1391c258b224fc6d88f62f42c725026 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Sat, 15 Jun 2024 08:05:30 +0800 Subject: [PATCH 202/373] gh-117398: Use the correct module loader for iOS in datetime CAPI test. (#120477) Use the correct loader for iOS. --- Lib/test/datetimetester.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 70e2e2cccdc55f..e55b738eb4a975 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -6786,6 +6786,13 @@ def test_datetime_from_timestamp(self): self.assertEqual(dt_orig, dt_rt) def test_type_check_in_subinterp(self): + # iOS requires the use of the custom framework loader, + # not the ExtensionFileLoader. + if sys.platform == "ios": + extension_loader = "AppleFrameworkLoader" + else: + extension_loader = "ExtensionFileLoader" + script = textwrap.dedent(f""" if {_interpreters is None}: import _testcapi as module @@ -6795,7 +6802,7 @@ def test_type_check_in_subinterp(self): import importlib.util fullname = '_testcapi_datetime' origin = importlib.util.find_spec('_testcapi').origin - loader = importlib.machinery.ExtensionFileLoader(fullname, origin) + loader = importlib.machinery.{extension_loader}(fullname, origin) spec = importlib.util.spec_from_loader(fullname, loader) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) From d4039d3f6f8cb7738c5cd272dde04171446dfd2b Mon Sep 17 00:00:00 2001 From: Adam Williamson Date: Fri, 14 Jun 2024 22:33:09 -0700 Subject: [PATCH 203/373] gh-120526: Correct signature of map() builtin (GH-120528) map() requires at least one iterable arg. Signed-off-by: Adam Williamson --- Python/bltinmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index c4d3ecbeeff0e6..6e50623cafa4ed 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -1475,7 +1475,7 @@ static PyMethodDef map_methods[] = { PyDoc_STRVAR(map_doc, -"map(function, /, *iterables)\n\ +"map(function, iterable, /, *iterables)\n\ --\n\ \n\ Make an iterator that computes the function using arguments from\n\ From 42ebdd83bb194f054fe5a10b3caa0c3a95be3679 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 15 Jun 2024 13:33:14 +0300 Subject: [PATCH 204/373] gh-120544: Add `else: fail()` to tests where exception is expected (#120545) --- Lib/test/test_exceptions.py | 2 ++ Lib/test/test_unittest/test_case.py | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 9460d1f1c864b9..e4f2e3a97b8bb8 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1859,6 +1859,8 @@ def f(): except self.failureException: with support.captured_stderr() as err: sys.__excepthook__(*sys.exc_info()) + else: + self.fail("assertRaisesRegex should have failed.") self.assertIn("aab", err.getvalue()) diff --git a/Lib/test/test_unittest/test_case.py b/Lib/test/test_unittest/test_case.py index 17420909402107..b4b2194a09cf9f 100644 --- a/Lib/test/test_unittest/test_case.py +++ b/Lib/test/test_unittest/test_case.py @@ -1151,6 +1151,8 @@ def testAssertMultiLineEqual(self): # need to remove the first line of the error message error = str(e).split('\n', 1)[1] self.assertEqual(sample_text_error, error) + else: + self.fail(f'{self.failureException} not raised') def testAssertEqualSingleLine(self): sample_text = "laden swallows fly slowly" @@ -1167,6 +1169,8 @@ def testAssertEqualSingleLine(self): # need to remove the first line of the error message error = str(e).split('\n', 1)[1] self.assertEqual(sample_text_error, error) + else: + self.fail(f'{self.failureException} not raised') def testAssertEqualwithEmptyString(self): '''Verify when there is an empty string involved, the diff output @@ -1184,6 +1188,8 @@ def testAssertEqualwithEmptyString(self): # need to remove the first line of the error message error = str(e).split('\n', 1)[1] self.assertEqual(sample_text_error, error) + else: + self.fail(f'{self.failureException} not raised') def testAssertEqualMultipleLinesMissingNewlineTerminator(self): '''Verifying format of diff output from assertEqual involving strings @@ -1204,6 +1210,8 @@ def testAssertEqualMultipleLinesMissingNewlineTerminator(self): # need to remove the first line of the error message error = str(e).split('\n', 1)[1] self.assertEqual(sample_text_error, error) + else: + self.fail(f'{self.failureException} not raised') def testAssertEqualMultipleLinesMismatchedNewlinesTerminators(self): '''Verifying format of diff output from assertEqual involving strings @@ -1227,6 +1235,8 @@ def testAssertEqualMultipleLinesMismatchedNewlinesTerminators(self): # need to remove the first line of the error message error = str(e).split('\n', 1)[1] self.assertEqual(sample_text_error, error) + else: + self.fail(f'{self.failureException} not raised') def testEqualityBytesWarning(self): if sys.flags.bytes_warning: From c501261c919ceb97c850ef9427a93326f06a8f2e Mon Sep 17 00:00:00 2001 From: Wulian233 <71213467+Wulian233@users.noreply.github.com> Date: Sat, 15 Jun 2024 19:04:14 +0800 Subject: [PATCH 205/373] gh-120495: Fix incorrect exception handling in Tab Nanny (#120498) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Lib/tabnanny.py | 8 ++++---- Lib/test/test_tabnanny.py | 2 +- Misc/ACKS | 1 + .../2024-06-14-20-05-25.gh-issue-120495.OxgZKB.rst | 1 + 4 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-14-20-05-25.gh-issue-120495.OxgZKB.rst diff --git a/Lib/tabnanny.py b/Lib/tabnanny.py index 7e56d4a48d1d00..c0097351b269f2 100644 --- a/Lib/tabnanny.py +++ b/Lib/tabnanny.py @@ -105,14 +105,14 @@ def check(file): errprint("%r: Token Error: %s" % (file, msg)) return - except SyntaxError as msg: - errprint("%r: Token Error: %s" % (file, msg)) - return - except IndentationError as msg: errprint("%r: Indentation Error: %s" % (file, msg)) return + except SyntaxError as msg: + errprint("%r: Syntax Error: %s" % (file, msg)) + return + except NannyNag as nag: badline = nag.get_lineno() line = nag.get_line() diff --git a/Lib/test/test_tabnanny.py b/Lib/test/test_tabnanny.py index cc122cafc7985c..30dcb3e3c4f4f9 100644 --- a/Lib/test/test_tabnanny.py +++ b/Lib/test/test_tabnanny.py @@ -315,7 +315,7 @@ def validate_cmd(self, *args, stdout="", stderr="", partial=False, expect_failur def test_with_errored_file(self): """Should displays error when errored python file is given.""" with TemporaryPyFile(SOURCE_CODES["wrong_indented"]) as file_path: - stderr = f"{file_path!r}: Token Error: " + stderr = f"{file_path!r}: Indentation Error: " stderr += ('unindent does not match any outer indentation level' ' (, line 3)') self.validate_cmd(file_path, stderr=stderr, expect_failure=True) diff --git a/Misc/ACKS b/Misc/ACKS index 2f4c0793437fb6..a406fca8744a5f 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1099,6 +1099,7 @@ Ivan Levkivskyi Ben Lewis William Lewis Akira Li +Jiahao Li Robert Li Xuanji Li Zekun Li diff --git a/Misc/NEWS.d/next/Library/2024-06-14-20-05-25.gh-issue-120495.OxgZKB.rst b/Misc/NEWS.d/next/Library/2024-06-14-20-05-25.gh-issue-120495.OxgZKB.rst new file mode 100644 index 00000000000000..d5114c3d3c904c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-14-20-05-25.gh-issue-120495.OxgZKB.rst @@ -0,0 +1 @@ +Fix incorrect exception handling in Tab Nanny. Patch by Wulian233. From 99d62f902e43c08ebec5a292fd3b30a9fc4cba69 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sat, 15 Jun 2024 13:51:58 +0100 Subject: [PATCH 206/373] Add some more edge-case tests for `inspect.get_annotations` with `eval_str=True` (#120550) --- .../inspect_stringized_annotations_pep695.py | 23 ++++++++++++++---- Lib/test/test_inspect/test_inspect.py | 24 +++++++++++++------ 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_inspect/inspect_stringized_annotations_pep695.py b/Lib/test/test_inspect/inspect_stringized_annotations_pep695.py index 723822f8eaa92d..39bfe2edb03f30 100644 --- a/Lib/test/test_inspect/inspect_stringized_annotations_pep695.py +++ b/Lib/test/test_inspect/inspect_stringized_annotations_pep695.py @@ -45,6 +45,13 @@ def generic_method[Foo, **Bar]( def generic_method_2[Eggs, **Spam](self, x: Eggs, y: Spam): pass +# Eggs is `int` in globals, a TypeVar in type_params, and `str` in locals: +class E[Eggs]: + Eggs = str + x: Eggs + + + def nested(): from types import SimpleNamespace from inspect import get_annotations @@ -53,7 +60,7 @@ def nested(): Spam = memoryview - class E[Eggs, **Spam]: + class F[Eggs, **Spam]: x: Eggs y: Spam @@ -63,10 +70,18 @@ def generic_method[Eggs, **Spam](self, x: Eggs, y: Spam): pass def generic_function[Eggs, **Spam](x: Eggs, y: Spam): pass + # Eggs is `int` in globals, `bytes` in the function scope, + # a TypeVar in the type_params, and `str` in locals: + class G[Eggs]: + Eggs = str + x: Eggs + + return SimpleNamespace( - E=E, - E_annotations=get_annotations(E, eval_str=True), - E_meth_annotations=get_annotations(E.generic_method, eval_str=True), + F=F, + F_annotations=get_annotations(F, eval_str=True), + F_meth_annotations=get_annotations(F.generic_method, eval_str=True), + G_annotations=get_annotations(G, eval_str=True), generic_func=generic_function, generic_func_annotations=get_annotations(generic_function, eval_str=True) ) diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index 140efac530afb2..ea8735d8f06459 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -1770,26 +1770,36 @@ def test_pep_695_generic_method_with_future_annotations_name_clash_with_global_v ) ) + def test_pep_695_generic_method_with_future_annotations_name_clash_with_global_and_local_vars(self): + self.assertEqual( + inspect.get_annotations( + inspect_stringized_annotations_pep695.E, eval_str=True + ), + {"x": str}, + ) + def test_pep_695_generics_with_future_annotations_nested_in_function(self): results = inspect_stringized_annotations_pep695.nested() self.assertEqual( - set(results.E_annotations.values()), - set(results.E.__type_params__) + set(results.F_annotations.values()), + set(results.F.__type_params__) ) self.assertEqual( - set(results.E_meth_annotations.values()), - set(results.E.generic_method.__type_params__) + set(results.F_meth_annotations.values()), + set(results.F.generic_method.__type_params__) ) self.assertNotEqual( - set(results.E_meth_annotations.values()), - set(results.E.__type_params__) + set(results.F_meth_annotations.values()), + set(results.F.__type_params__) ) self.assertEqual( - set(results.E_meth_annotations.values()).intersection(results.E.__type_params__), + set(results.F_meth_annotations.values()).intersection(results.F.__type_params__), set() ) + self.assertEqual(results.G_annotations, {"x": str}) + self.assertEqual( set(results.generic_func_annotations.values()), set(results.generic_func.__type_params__) From 6f63dfff6f493b405f3422210a168369e1e7a35d Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 15 Jun 2024 22:39:22 +0800 Subject: [PATCH 207/373] gh-117657: Make PyType_HasFeature (exported version) atomic (#120484) Make PyType_HasFeature (exported version) atomic --- Include/object.h | 6 +++++- Objects/typeobject.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Include/object.h b/Include/object.h index 4a39ada8c7daa4..f71aaee7efe6ee 100644 --- a/Include/object.h +++ b/Include/object.h @@ -756,7 +756,11 @@ PyType_HasFeature(PyTypeObject *type, unsigned long feature) // PyTypeObject is opaque in the limited C API flags = PyType_GetFlags(type); #else - flags = type->tp_flags; +# ifdef Py_GIL_DISABLED + flags = _Py_atomic_load_ulong_relaxed(&type->tp_flags); +# else + flags = type->tp_flags; +# endif #endif return ((flags & feature) != 0); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 98e00bd25c3205..eb296414bb7bef 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3599,7 +3599,7 @@ type_init(PyObject *cls, PyObject *args, PyObject *kwds) unsigned long PyType_GetFlags(PyTypeObject *type) { - return type->tp_flags; + return FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags); } From 9e0b11eb21930b7b8e4a396200a921e9985cfca4 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sat, 15 Jun 2024 08:18:16 -0700 Subject: [PATCH 208/373] annotations: expand documentation on "simple" assignment targets (#120535) This behavior is rather surprising and it was not clearly specified. Co-authored-by: Alex Waygood --- Doc/library/ast.rst | 10 +++++++--- Doc/reference/simple_stmts.rst | 7 +++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index 9ee56b92431b57..f7e8afa7000392 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -891,9 +891,13 @@ Statements An assignment with a type annotation. ``target`` is a single node and can be a :class:`Name`, a :class:`Attribute` or a :class:`Subscript`. ``annotation`` is the annotation, such as a :class:`Constant` or :class:`Name` - node. ``value`` is a single optional node. ``simple`` is a boolean integer - set to True for a :class:`Name` node in ``target`` that do not appear in - between parenthesis and are hence pure names and not expressions. + node. ``value`` is a single optional node. + + ``simple`` is always either 0 (indicating a "complex" target) or 1 + (indicating a "simple" target). A "simple" target consists solely of a + :class:`Name` node that does not appear between parentheses; all other + targets are considered complex. Only simple targets appear in + the :attr:`__annotations__` dictionary of modules and classes. .. doctest:: diff --git a/Doc/reference/simple_stmts.rst b/Doc/reference/simple_stmts.rst index a253482156d3b4..4f6c0c63ae42be 100644 --- a/Doc/reference/simple_stmts.rst +++ b/Doc/reference/simple_stmts.rst @@ -333,7 +333,9 @@ statement, of a variable or attribute annotation and an optional assignment stat The difference from normal :ref:`assignment` is that only a single target is allowed. -For simple names as assignment targets, if in class or module scope, +The assignment target is considered "simple" if it consists of a single +name that is not enclosed in parentheses. +For simple assignment targets, if in class or module scope, the annotations are evaluated and stored in a special class or module attribute :attr:`__annotations__` that is a dictionary mapping from variable names (mangled if private) to @@ -341,7 +343,8 @@ evaluated annotations. This attribute is writable and is automatically created at the start of class or module body execution, if annotations are found statically. -For expressions as assignment targets, the annotations are evaluated if +If the assignment target is not simple (an attribute, subscript node, or +parenthesized name), the annotation is evaluated if in class or module scope, but not stored. If a name is annotated in a function scope, then this name is local for From 31d1d72d7e24e0427df70f7dd14b9baff28a4f89 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 15 Jun 2024 20:56:40 +0300 Subject: [PATCH 209/373] gh-120541: Improve the "less" prompt in pydoc (GH-120543) When help() is called with non-string argument, use __qualname__ or __name__ if available, otherwise use "{typename} object". --- Lib/pydoc.py | 9 ++- Lib/test/test_pydoc/test_pydoc.py | 62 +++++++++++++++---- ...-06-15-12-04-46.gh-issue-120541.d3cc5y.rst | 2 + 3 files changed, 59 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-15-12-04-46.gh-issue-120541.d3cc5y.rst diff --git a/Lib/pydoc.py b/Lib/pydoc.py index be5cd9a80db710..768c3dcb11ec59 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1755,7 +1755,14 @@ def doc(thing, title='Python Library Documentation: %s', forceload=0, """Display text documentation, given an object or a path to an object.""" if output is None: try: - what = thing if isinstance(thing, str) else type(thing).__name__ + if isinstance(thing, str): + what = thing + else: + what = getattr(thing, '__qualname__', None) + if not isinstance(what, str): + what = getattr(thing, '__name__', None) + if not isinstance(what, str): + what = type(thing).__name__ + ' object' pager(render_doc(thing, title, forceload), f'Help on {what!s}') except ImportError as exc: if is_cli: diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index a17c16cc73cf0e..b520cfd0b50e38 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -31,7 +31,7 @@ from test.support.script_helper import (assert_python_ok, assert_python_failure, spawn_python) from test.support import threading_helper -from test.support import (reap_children, captured_output, captured_stdout, +from test.support import (reap_children, captured_stdout, captured_stderr, is_emscripten, is_wasi, requires_docstrings, MISSING_C_DOCSTRINGS) from test.support.os_helper import (TESTFN, rmtree, unlink) @@ -680,9 +680,8 @@ def test_help_output_redirect(self, pager_mock): help_header = textwrap.dedent(help_header) expected_help_pattern = help_header + expected_text_pattern - with captured_output('stdout') as output, \ - captured_output('stderr') as err, \ - StringIO() as buf: + with captured_stdout() as output, captured_stderr() as err: + buf = StringIO() helper = pydoc.Helper(output=buf) helper.help(module) result = buf.getvalue().strip() @@ -706,9 +705,8 @@ def test_help_output_redirect_various_requests(self, pager_mock): def run_pydoc_for_request(request, expected_text_part): """Helper function to run pydoc with its output redirected""" - with captured_output('stdout') as output, \ - captured_output('stderr') as err, \ - StringIO() as buf: + with captured_stdout() as output, captured_stderr() as err: + buf = StringIO() helper = pydoc.Helper(output=buf) helper.help(request) result = buf.getvalue().strip() @@ -742,6 +740,45 @@ def run_pydoc_for_request(request, expected_text_part): run_pydoc_for_request(pydoc.Helper.help, 'Help on function help in module pydoc:') # test for pydoc.Helper() instance skipped because it is always meant to be interactive + @unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(), + 'trace function introduces __locals__ unexpectedly') + @requires_docstrings + def test_help_output_pager(self): + def run_pydoc_pager(request, what, expected_first_line): + with (captured_stdout() as output, + captured_stderr() as err, + unittest.mock.patch('pydoc.pager') as pager_mock, + self.subTest(repr(request))): + helper = pydoc.Helper() + helper.help(request) + self.assertEqual('', err.getvalue()) + self.assertEqual('\n', output.getvalue()) + pager_mock.assert_called_once() + result = clean_text(pager_mock.call_args.args[0]) + self.assertEqual(result.splitlines()[0], expected_first_line) + self.assertEqual(pager_mock.call_args.args[1], f'Help on {what}') + + run_pydoc_pager('%', 'EXPRESSIONS', 'Operator precedence') + run_pydoc_pager('True', 'bool object', 'Help on bool object:') + run_pydoc_pager(True, 'bool object', 'Help on bool object:') + run_pydoc_pager('assert', 'assert', 'The "assert" statement') + run_pydoc_pager('TYPES', 'TYPES', 'The standard type hierarchy') + run_pydoc_pager('pydoc.Helper.help', 'pydoc.Helper.help', + 'Help on function help in pydoc.Helper:') + run_pydoc_pager(pydoc.Helper.help, 'Helper.help', + 'Help on function help in module pydoc:') + run_pydoc_pager('str', 'str', 'Help on class str in module builtins:') + run_pydoc_pager(str, 'str', 'Help on class str in module builtins:') + run_pydoc_pager('str.upper', 'str.upper', 'Help on method_descriptor in str:') + run_pydoc_pager(str.upper, 'str.upper', 'Help on method_descriptor:') + run_pydoc_pager(str.__add__, 'str.__add__', 'Help on wrapper_descriptor:') + run_pydoc_pager(int.numerator, 'int.numerator', + 'Help on getset descriptor builtins.int.numerator:') + run_pydoc_pager(list[int], 'list', + 'Help on GenericAlias in module builtins:') + run_pydoc_pager('sys', 'sys', 'Help on built-in module sys:') + run_pydoc_pager(sys, 'sys', 'Help on built-in module sys:') + def test_showtopic(self): with captured_stdout() as showtopic_io: helper = pydoc.Helper() @@ -775,9 +812,8 @@ def test_showtopic_output_redirect(self, pager_mock): # Helper.showtopic should be redirected self.maxDiff = None - with captured_output('stdout') as output, \ - captured_output('stderr') as err, \ - StringIO() as buf: + with captured_stdout() as output, captured_stderr() as err: + buf = StringIO() helper = pydoc.Helper(output=buf) helper.showtopic('with') result = buf.getvalue().strip() @@ -790,7 +826,7 @@ def test_showtopic_output_redirect(self, pager_mock): def test_lambda_with_return_annotation(self): func = lambda a, b, c: 1 func.__annotations__ = {"return": int} - with captured_output('stdout') as help_io: + with captured_stdout() as help_io: pydoc.help(func) helptext = help_io.getvalue() self.assertIn("lambda (a, b, c) -> int", helptext) @@ -798,7 +834,7 @@ def test_lambda_with_return_annotation(self): def test_lambda_without_return_annotation(self): func = lambda a, b, c: 1 func.__annotations__ = {"a": int, "b": int, "c": int} - with captured_output('stdout') as help_io: + with captured_stdout() as help_io: pydoc.help(func) helptext = help_io.getvalue() self.assertIn("lambda (a: int, b: int, c: int)", helptext) @@ -806,7 +842,7 @@ def test_lambda_without_return_annotation(self): def test_lambda_with_return_and_params_annotation(self): func = lambda a, b, c: 1 func.__annotations__ = {"a": int, "b": int, "c": int, "return": int} - with captured_output('stdout') as help_io: + with captured_stdout() as help_io: pydoc.help(func) helptext = help_io.getvalue() self.assertIn("lambda (a: int, b: int, c: int) -> int", helptext) diff --git a/Misc/NEWS.d/next/Library/2024-06-15-12-04-46.gh-issue-120541.d3cc5y.rst b/Misc/NEWS.d/next/Library/2024-06-15-12-04-46.gh-issue-120541.d3cc5y.rst new file mode 100644 index 00000000000000..bf8830c6c50386 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-15-12-04-46.gh-issue-120541.d3cc5y.rst @@ -0,0 +1,2 @@ +Improve the prompt in the "less" pager when :func:`help` is called with +non-string argument. From 08d09cf5ba041c9c5c3860200b56bab66fd44a23 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Sat, 15 Jun 2024 20:46:39 +0200 Subject: [PATCH 210/373] gh-112346: Always set OS byte to 255, simpler gzip.compress function. (GH-120486) This matches the output behavior in 3.10 and earlier; the optimization in 3.11 allowed the zlib library's "os" value to be filled in instead in the circumstance when mtime was 0. this keeps things consistent. --- Doc/library/gzip.rst | 8 ++-- Lib/gzip.py | 38 ++++--------------- Lib/test/test_gzip.py | 12 +++++- ...4-06-12-10-00-31.gh-issue-90425.5CfkKG.rst | 2 + 4 files changed, 26 insertions(+), 34 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-12-10-00-31.gh-issue-90425.5CfkKG.rst diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 965da5981f6dbc..152cba4f653cb4 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -188,9 +188,7 @@ The module defines the following items: Compress the *data*, returning a :class:`bytes` object containing the compressed data. *compresslevel* and *mtime* have the same meaning as in - the :class:`GzipFile` constructor above. When *mtime* is set to ``0``, this - function is equivalent to :func:`zlib.compress` with *wbits* set to ``31``. - The zlib function is faster. + the :class:`GzipFile` constructor above. .. versionadded:: 3.2 .. versionchanged:: 3.8 @@ -200,6 +198,10 @@ The module defines the following items: streamed fashion. Calls with *mtime* set to ``0`` are delegated to :func:`zlib.compress` for better speed. + .. versionchanged:: 3.13 + The gzip header OS byte is guaranteed to be set to 255 when this function + is used as was the case in 3.10 and earlier. + .. function:: decompress(data) Decompress the *data*, returning a :class:`bytes` object containing the diff --git a/Lib/gzip.py b/Lib/gzip.py index 0d19c84c59cfa7..ba753ce3050dd8 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -580,27 +580,6 @@ def _rewind(self): self._new_member = True -def _create_simple_gzip_header(compresslevel: int, - mtime = None) -> bytes: - """ - Write a simple gzip header with no extra fields. - :param compresslevel: Compresslevel used to determine the xfl bytes. - :param mtime: The mtime (must support conversion to a 32-bit integer). - :return: A bytes object representing the gzip header. - """ - if mtime is None: - mtime = time.time() - if compresslevel == _COMPRESS_LEVEL_BEST: - xfl = 2 - elif compresslevel == _COMPRESS_LEVEL_FAST: - xfl = 4 - else: - xfl = 0 - # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra - # fields added to header), mtime, xfl and os (255 for unknown OS). - return struct.pack(" Date: Sun, 16 Jun 2024 13:36:10 +0800 Subject: [PATCH 211/373] gh-120572: add missing parentheses in TypeIs documentation (#120573) --- Doc/library/typing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index 94de64fcf835fc..bf0ff9bd348553 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -1454,8 +1454,8 @@ These can be used as types in annotations. They all support subscription using to write such functions in a type-safe manner. If a ``TypeIs`` function is a class or instance method, then the type in - ``TypeIs`` maps to the type of the second parameter after ``cls`` or - ``self``. + ``TypeIs`` maps to the type of the second parameter (after ``cls`` or + ``self``). In short, the form ``def foo(arg: TypeA) -> TypeIs[TypeB]: ...``, means that if ``foo(arg)`` returns ``True``, then ``arg`` is an instance From cf49ef78f894e418bea7de23dde9b01d6235889d Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Sun, 16 Jun 2024 01:55:47 -0400 Subject: [PATCH 212/373] gh-120360: Add self as IDLE doc owner (#120571) Add self as IDLE doc owner --- .github/CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1f9047ab97e934..eb7cc88565f6d0 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -212,6 +212,7 @@ Doc/c-api/stable.rst @encukou **/*ensurepip* @pfmoore @pradyunsg **/*idlelib* @terryjreedy +/Doc/library/idle.rst @terryjreedy **/*typing* @JelleZijlstra @AlexWaygood From 0c0348adbfca991f78b3aaa6790e5c26606a1c0f Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 16 Jun 2024 11:26:13 +0300 Subject: [PATCH 213/373] gh-120579: Guard `_testcapi` import in `test_free_threading` (#120580) --- Lib/test/test_free_threading/test_dict.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_free_threading/test_dict.py b/Lib/test/test_free_threading/test_dict.py index f877582e6b565c..3126458e08e50a 100644 --- a/Lib/test/test_free_threading/test_dict.py +++ b/Lib/test/test_free_threading/test_dict.py @@ -8,7 +8,10 @@ from threading import Thread from unittest import TestCase -from _testcapi import dict_version +try: + import _testcapi +except ImportError: + _testcapi = None from test.support import threading_helper @@ -139,7 +142,9 @@ def writer_func(l): for ref in thread_list: self.assertIsNone(ref()) + @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_dict_version(self): + dict_version = _testcapi.dict_version THREAD_COUNT = 10 DICT_COUNT = 10000 lists = [] From 192d17c3fd9945104bc0303cf248bb0d074d260e Mon Sep 17 00:00:00 2001 From: Idan Kapustian <71190257+idankap@users.noreply.github.com> Date: Sun, 16 Jun 2024 15:15:03 +0300 Subject: [PATCH 214/373] gh-120485: Add an override of `allow_reuse_port` on classes subclassing `socketserver.TCPServer` (GH-120488) Co-authored-by: Vinay Sajip --- Lib/http/server.py | 3 ++- Lib/logging/config.py | 3 ++- Lib/test/test_logging.py | 1 + Lib/xmlrpc/server.py | 1 + .../2024-06-14-07-52-00.gh-issue-120485.yy4K4b.rst | 1 + 5 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-14-07-52-00.gh-issue-120485.yy4K4b.rst diff --git a/Lib/http/server.py b/Lib/http/server.py index 7d0da5052d2d4d..2d010649e56b51 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -129,7 +129,8 @@ class HTTPServer(socketserver.TCPServer): - allow_reuse_address = 1 # Seems to make sense in testing environment + allow_reuse_address = True # Seems to make sense in testing environment + allow_reuse_port = True def server_bind(self): """Override server_bind to store the server name.""" diff --git a/Lib/logging/config.py b/Lib/logging/config.py index 9de84e527b18ac..d2f23e53f35c57 100644 --- a/Lib/logging/config.py +++ b/Lib/logging/config.py @@ -984,7 +984,8 @@ class ConfigSocketReceiver(ThreadingTCPServer): A simple TCP socket-based logging config receiver. """ - allow_reuse_address = 1 + allow_reuse_address = True + allow_reuse_port = True def __init__(self, host='localhost', port=DEFAULT_LOGGING_CONFIG_PORT, handler=None, ready=None, verify=None): diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 504862ad53395e..5192ce252a4d4c 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -1038,6 +1038,7 @@ class TestTCPServer(ControlMixin, ThreadingTCPServer): """ allow_reuse_address = True + allow_reuse_port = True def __init__(self, addr, handler, poll_interval=0.5, bind_and_activate=True): diff --git a/Lib/xmlrpc/server.py b/Lib/xmlrpc/server.py index 4dddb1d10e08bd..90a356fbb8eae4 100644 --- a/Lib/xmlrpc/server.py +++ b/Lib/xmlrpc/server.py @@ -578,6 +578,7 @@ class SimpleXMLRPCServer(socketserver.TCPServer, """ allow_reuse_address = True + allow_reuse_port = True # Warning: this is for debugging purposes only! Never set this to True in # production code, as will be sending out sensitive information (exception diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-14-07-52-00.gh-issue-120485.yy4K4b.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-14-07-52-00.gh-issue-120485.yy4K4b.rst new file mode 100644 index 00000000000000..f41c233908362f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-14-07-52-00.gh-issue-120485.yy4K4b.rst @@ -0,0 +1 @@ +Add an override of ``allow_reuse_port`` on classes subclassing ``socketserver.TCPServer`` where ``allow_reuse_address`` is also overridden. From b8484c6ad7fd14ca464e584b79821b4b906dd77a Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sun, 16 Jun 2024 06:51:17 -0600 Subject: [PATCH 215/373] Docs: remove temporary hardcoded links (#120348) --- Doc/tools/static/rtd_switcher.js | 35 +------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/Doc/tools/static/rtd_switcher.js b/Doc/tools/static/rtd_switcher.js index a67bb85505a9ca..f5dc7045a0dbc4 100644 --- a/Doc/tools/static/rtd_switcher.js +++ b/Doc/tools/static/rtd_switcher.js @@ -6,42 +6,9 @@ document.addEventListener("readthedocs-addons-data-ready", function(event) { const config = event.detail.data() - - // Add some mocked hardcoded versions pointing to the official - // documentation while migrating to Read the Docs. - // These are only for testing purposes. - // TODO: remove them when managing all the versions on Read the Docs, - // since all the "active, built and not hidden" versions will be shown automatically. - let versions = config.versions.active.concat([ - { - slug: "dev (3.14)", - urls: { - documentation: "https://docs.python.org/3.14/", - } - }, - { - slug: "dev (3.13)", - urls: { - documentation: "https://docs.python.org/3.13/", - } - }, - { - slug: "3.12", - urls: { - documentation: "https://docs.python.org/3.12/", - } - }, - { - slug: "3.11", - urls: { - documentation: "https://docs.python.org/3.11/", - } - }, - ]); - const versionSelect = `