diff --git a/newsfragments/4798.fixed.md b/newsfragments/4798.fixed.md new file mode 100644 index 00000000000..e53757d0a01 --- /dev/null +++ b/newsfragments/4798.fixed.md @@ -0,0 +1,2 @@ +fixes several limitations with base native type initialization. Required for extending native base types +with the limited API and extending base native types that require arguments passed to `__new__`. \ No newline at end of file diff --git a/pyo3-macros-backend/src/method.rs b/pyo3-macros-backend/src/method.rs index a1d7a95df35..ed17f7de44a 100644 --- a/pyo3-macros-backend/src/method.rs +++ b/pyo3-macros-backend/src/method.rs @@ -830,12 +830,14 @@ impl<'a> FnSpec<'a> { _kwargs: *mut #pyo3_path::ffi::PyObject ) -> #pyo3_path::PyResult<*mut #pyo3_path::ffi::PyObject> { use #pyo3_path::impl_::callback::IntoPyCallbackOutput; + let raw_args = _args; + let raw_kwargs = _kwargs; let function = #rust_name; // Shadow the function name to avoid #3017 #arg_convert #init_holders let result = #call; let initializer: #pyo3_path::PyClassInitializer::<#cls> = result.convert(py)?; - #pyo3_path::impl_::pymethods::tp_new_impl(py, initializer, _slf) + #pyo3_path::impl_::pymethods::tp_new_impl(py, initializer, _slf, raw_args, raw_kwargs) } } } diff --git a/src/impl_/pyclass.rs b/src/impl_/pyclass.rs index ac5c6e3e3f0..695312bd53e 100644 --- a/src/impl_/pyclass.rs +++ b/src/impl_/pyclass.rs @@ -1138,6 +1138,8 @@ pub trait PyClassBaseType: Sized { type BaseNativeType; type Initializer: PyObjectInit; type PyClassMutability: PyClassMutability; + /// Use the provided function instead of [ffi::PyTypeObject::tp_new] when creating an object of this type + const OVERRIDE_TP_NEW: Option = None; } /// Implementation of tp_dealloc for pyclasses without gc diff --git a/src/impl_/pyclass_init.rs b/src/impl_/pyclass_init.rs index 7242b6186d9..805759ebedf 100644 --- a/src/impl_/pyclass_init.rs +++ b/src/impl_/pyclass_init.rs @@ -1,11 +1,14 @@ //! Contains initialization utilities for `#[pyclass]`. +use crate::exceptions::PyTypeError; use crate::ffi_ptr_ext::FfiPtrExt; -use crate::internal::get_slot::TP_ALLOC; -use crate::types::PyType; -use crate::{ffi, Borrowed, PyErr, PyResult, Python}; +use crate::internal::get_slot::TP_NEW; +use crate::types::{PyDict, PyTuple, PyType}; +use crate::{ffi, Bound, PyErr, PyResult, Python}; use crate::{ffi::PyTypeObject, sealed::Sealed, type_object::PyTypeInfo}; use std::marker::PhantomData; +use super::pyclass::PyClassBaseType; + /// Initializer for Python types. /// /// This trait is intended to use internally for distinguishing `#[pyclass]` and @@ -17,69 +20,70 @@ pub trait PyObjectInit: Sized + Sealed { self, py: Python<'_>, subtype: *mut PyTypeObject, + args: &Bound<'_, PyTuple>, + kwargs: Option<&Bound<'_, PyDict>>, ) -> PyResult<*mut ffi::PyObject>; #[doc(hidden)] fn can_be_subclassed(&self) -> bool; } -/// Initializer for Python native types, like `PyDict`. -pub struct PyNativeTypeInitializer(pub PhantomData); +/// Initializer for Python native types, like [PyDict]. +pub struct PyNativeTypeInitializer(pub PhantomData); -impl PyObjectInit for PyNativeTypeInitializer { +impl PyObjectInit for PyNativeTypeInitializer { + /// call `__new__` ([ffi::PyTypeObject::tp_new]) for the native base type. + /// This will allocate a new python object and initialize the part relating to the native base type. unsafe fn into_new_object( self, py: Python<'_>, subtype: *mut PyTypeObject, + args: &Bound<'_, PyTuple>, + kwargs: Option<&Bound<'_, PyDict>>, ) -> PyResult<*mut ffi::PyObject> { unsafe fn inner( py: Python<'_>, - type_object: *mut PyTypeObject, + native_base_type: *mut PyTypeObject, subtype: *mut PyTypeObject, + args: &Bound<'_, PyTuple>, + kwargs: Option<&Bound<'_, PyDict>>, + override_tp_new: Option, ) -> PyResult<*mut ffi::PyObject> { - // HACK (due to FIXME below): PyBaseObject_Type's tp_new isn't happy with NULL arguments - let is_base_object = type_object == std::ptr::addr_of_mut!(ffi::PyBaseObject_Type); - let subtype_borrowed: Borrowed<'_, '_, PyType> = subtype - .cast::() - .assume_borrowed_unchecked(py) - .downcast_unchecked(); - - if is_base_object { - let alloc = subtype_borrowed - .get_slot(TP_ALLOC) - .unwrap_or(ffi::PyType_GenericAlloc); - - let obj = alloc(subtype, 0); - return if obj.is_null() { - Err(PyErr::fetch(py)) - } else { - Ok(obj) - }; - } + let tp_new = if let Some(tp_new) = override_tp_new { + tp_new + } else { + native_base_type + .cast::() + .assume_borrowed_unchecked(py) + .downcast_unchecked::() + .get_slot(TP_NEW) + .ok_or_else(|| { + PyTypeError::new_err("cannot construct type that does not define __new__") + })? + }; - #[cfg(Py_LIMITED_API)] - unreachable!("subclassing native types is not possible with the `abi3` feature"); + let obj = tp_new( + subtype, + args.as_ptr(), + kwargs + .map(|obj| obj.as_ptr()) + .unwrap_or(std::ptr::null_mut()), + ); - #[cfg(not(Py_LIMITED_API))] - { - match (*type_object).tp_new { - // FIXME: Call __new__ with actual arguments - Some(newfunc) => { - let obj = newfunc(subtype, std::ptr::null_mut(), std::ptr::null_mut()); - if obj.is_null() { - Err(PyErr::fetch(py)) - } else { - Ok(obj) - } - } - None => Err(crate::exceptions::PyTypeError::new_err( - "base type without tp_new", - )), - } + if obj.is_null() { + Err(PyErr::fetch(py)) + } else { + Ok(obj) } } - let type_object = T::type_object_raw(py); - inner(py, type_object, subtype) + inner( + py, + T::type_object_raw(py), + subtype, + args, + kwargs, + T::OVERRIDE_TP_NEW, + ) } #[inline] diff --git a/src/impl_/pymethods.rs b/src/impl_/pymethods.rs index 58d0c93c240..705c41e413d 100644 --- a/src/impl_/pymethods.rs +++ b/src/impl_/pymethods.rs @@ -8,10 +8,10 @@ use crate::pycell::impl_::PyClassBorrowChecker as _; use crate::pycell::{PyBorrowError, PyBorrowMutError}; use crate::pyclass::boolean_struct::False; use crate::types::any::PyAnyMethods; -use crate::types::PyType; +use crate::types::{PyDict, PyTuple, PyType}; use crate::{ - ffi, Bound, DowncastError, Py, PyAny, PyClass, PyClassInitializer, PyErr, PyObject, PyRef, - PyRefMut, PyResult, PyTraverseError, PyTypeCheck, PyVisit, Python, + ffi, Borrowed, Bound, DowncastError, Py, PyAny, PyClass, PyClassInitializer, PyErr, PyObject, + PyRef, PyRefMut, PyResult, PyTraverseError, PyTypeCheck, PyVisit, Python, }; use std::ffi::CStr; use std::fmt; @@ -696,13 +696,24 @@ impl<'py, T> std::ops::Deref for BoundRef<'_, 'py, T> { } } -pub unsafe fn tp_new_impl( - py: Python<'_>, +pub unsafe fn tp_new_impl<'py, T: PyClass>( + py: Python<'py>, initializer: PyClassInitializer, - target_type: *mut ffi::PyTypeObject, + most_derived_type: *mut ffi::PyTypeObject, + args: *mut ffi::PyObject, + kwargs: *mut ffi::PyObject, ) -> PyResult<*mut ffi::PyObject> { + // Safety: + // - `args` is known to be a tuple + // - `kwargs` is known to be a dict or null + // - we both have the GIL and can borrow these input references for the `'py` lifetime. + let args: Borrowed<'py, 'py, PyTuple> = + Borrowed::from_ptr(py, args).downcast_unchecked::(); + let kwargs: Option> = + Borrowed::from_ptr_or_opt(py, kwargs).map(|kwargs| kwargs.downcast_unchecked()); + initializer - .create_class_object_of_type(py, target_type) + .create_class_object_of_type(py, most_derived_type, &args, kwargs.as_deref()) .map(Bound::into_ptr) } diff --git a/src/internal/get_slot.rs b/src/internal/get_slot.rs index 260893d4204..6a24608d6e9 100644 --- a/src/internal/get_slot.rs +++ b/src/internal/get_slot.rs @@ -122,11 +122,11 @@ macro_rules! impl_slots { // Slots are implemented on-demand as needed.) impl_slots! { - TP_ALLOC: (Py_tp_alloc, tp_alloc) -> Option, TP_BASE: (Py_tp_base, tp_base) -> *mut ffi::PyTypeObject, TP_CLEAR: (Py_tp_clear, tp_clear) -> Option, TP_DESCR_GET: (Py_tp_descr_get, tp_descr_get) -> Option, TP_FREE: (Py_tp_free, tp_free) -> Option, + TP_NEW: (Py_tp_new, tp_new) -> Option, TP_TRAVERSE: (Py_tp_traverse, tp_traverse) -> Option, } diff --git a/src/pyclass_init.rs b/src/pyclass_init.rs index 6dc6ec12c6b..211033502b6 100644 --- a/src/pyclass_init.rs +++ b/src/pyclass_init.rs @@ -1,9 +1,60 @@ //! Contains initialization utilities for `#[pyclass]`. +//! +//! # Background +//! +//! Initialization of a regular empty python class `class MyClass: pass` +//! - `MyClass(*args, **kwargs) == MyClass.__call__(*args, **kwargs) == type.__call__(*args, **kwargs)` +//! - `MyClass.tp_call` is NULL but `obj.__call__` uses `Py_TYPE(obj)->tp_call` ([ffi::_PyObject_MakeTpCall]) +//! - so `__call__` is inherited from the metaclass which is `type` in this case. +//! - `type.__call__` calls `obj = MyClass.__new__(MyClass, *args, **kwargs) == object.__new__(MyClass, *args, **kwargs)` +//! - `MyClass.tp_new` is inherited from the base class which is `object` in this case. +//! - Allocates a new object and does some basic initialization +//! - `type.__call__` calls `MyClass.__init__(obj, *args, **kwargs) == object.__init__(obj, *args, **kwargs)` +//! - `MyClass.tp_init` is inherited from the base class which is `object` in this case. +//! - Does some checks but is essentially a no-op +//! +//! So in general for `class MyClass(BaseClass, metaclass=MetaClass): pass` +//! - `MyClass(*args, **kwargs)` +//! - `Metaclass.__call__(*args, **kwargs)` +//! - `BaseClass.__new__(*args, **kwargs)` +//! - `BaseClass.__init__(*args, **kwargs)` +//! +//! - If `MyClass` defines `__new__` then it must delegate to `super(MyClass, cls).__new__(cls, *args, **kwargs)` to +//! allocate the object. +//! - is is the responsibility of `MyClass` to call `super().__new__()` with the correct arguments. +//! `object.__new__()` does not accept any arguments for example. +//! - If `MyClass` defines `__init__` then it should call `super().__init__()` to recursively initialize +//! the base classes. Again, passing arguments is the responsibility of MyClass. +//! +//! Initialization of a pyo3 `#[pyclass] struct MyClass;` +//! - `MyClass(*args, **kwargs) == MyClass.__call__(*args, **kwargs) == type.__call__(*args, **kwargs)` +//! - Calls `obj = MyClass.__new__(MyClass, *args, **kwargs)` +//! - Calls user defined `#[new]` function, returning a [`IntoPyCallbackOutput`] which has +//! instances of each user defined struct in the inheritance hierarchy. +//! - Calls `PyClassInitializer::create_class_object_of_type` +//! - Recursively calls back to the base native type. +//! - At the base native type, [PyObjectInit::into_new_object] calls `__new__` for the base native type +//! (passing the [ffi::PyTypeObject] of the most derived type) +//! - Allocates a new python object with enough space to fit the user structs and the native base type data. +//! - Initializes the native base type part of the new object. +//! - Moves the data for the user structs into the appropriate locations in the new python object. +//! - Calls `MyClass.__init__(obj, *args, **kwargs)` +//! - Inherited from native base class +//! +//! ## Notes: +//! - pyo3 classes annotated with `#[pyclass(dict)]` have a `__dict__` attribute. When using the `tp_dictoffset` +//! mechanism instead of `Py_TPFLAGS_MANAGED_DICT` to enable this, the dict is stored in the `PyClassObjectContents` +//! of the most derived type and is set to NULL at construction and initialized to a new dictionary by +//! [ffi::PyObject_GenericGetDict] when first accessed. +//! - The python documentation also mentions 'static' classes which define their [ffi::PyTypeObject] in static/global +//! memory. Builtins like `dict` (`PyDict_Type`) are defined this way but classes defined in python and pyo3 are +//! 'heap' types where the [ffi::PyTypeObject] objects are allocated at runtime. +//! use crate::ffi_ptr_ext::FfiPtrExt; use crate::impl_::callback::IntoPyCallbackOutput; use crate::impl_::pyclass::{PyClassBaseType, PyClassDict, PyClassThreadChecker, PyClassWeakRef}; use crate::impl_::pyclass_init::{PyNativeTypeInitializer, PyObjectInit}; -use crate::types::PyAnyMethods; +use crate::types::{PyAnyMethods, PyDict, PyTuple}; use crate::{ffi, Bound, Py, PyClass, PyResult, Python}; use crate::{ ffi::PyTypeObject, @@ -150,18 +201,22 @@ impl PyClassInitializer { where T: PyClass, { - unsafe { self.create_class_object_of_type(py, T::type_object_raw(py)) } + unsafe { + self.create_class_object_of_type(py, T::type_object_raw(py), &PyTuple::empty(py), None) + } } /// Creates a new class object and initializes it given a typeobject `subtype`. /// /// # Safety /// `subtype` must be a valid pointer to the type object of T or a subclass. - pub(crate) unsafe fn create_class_object_of_type( + pub(crate) unsafe fn create_class_object_of_type<'py>( self, - py: Python<'_>, + py: Python<'py>, target_type: *mut crate::ffi::PyTypeObject, - ) -> PyResult> + args: &Bound<'py, PyTuple>, + kwargs: Option<&Bound<'py, PyDict>>, + ) -> PyResult> where T: PyClass, { @@ -178,7 +233,7 @@ impl PyClassInitializer { PyClassInitializerImpl::New { init, super_init } => (init, super_init), }; - let obj = super_init.into_new_object(py, target_type)?; + let obj = super_init.into_new_object(py, target_type, args, kwargs)?; let part_init: *mut PartiallyInitializedClassObject = obj.cast(); std::ptr::write( @@ -203,8 +258,10 @@ impl PyObjectInit for PyClassInitializer { self, py: Python<'_>, subtype: *mut PyTypeObject, + args: &Bound<'_, PyTuple>, + kwargs: Option<&Bound<'_, PyDict>>, ) -> PyResult<*mut ffi::PyObject> { - self.create_class_object_of_type(py, subtype) + self.create_class_object_of_type(py, subtype, args, kwargs) .map(Bound::into_ptr) } @@ -268,9 +325,12 @@ where #[cfg(all(test, feature = "macros"))] mod tests { - //! See https://github.com/PyO3/pyo3/issues/4452. - - use crate::prelude::*; + use crate::{ + ffi, + prelude::*, + types::{PyDict, PyType}, + PyTypeInfo, + }; #[pyclass(crate = "crate", subclass)] struct BaseClass {} @@ -280,12 +340,99 @@ mod tests { _data: i32, } + /// See https://github.com/PyO3/pyo3/issues/4452. #[test] - #[should_panic] + #[should_panic(expected = "you cannot add a subclass to an existing value")] fn add_subclass_to_py_is_unsound() { Python::with_gil(|py| { let base = Py::new(py, BaseClass {}).unwrap(); let _subclass = PyClassInitializer::from(base).add_subclass(SubClass { _data: 42 }); }); } + + /// Verify the correctness of the documentation describing class initialization. + #[test] + fn empty_class_inherits_expected_slots() { + Python::with_gil(|py| { + let namespace = PyDict::new(py); + py.run( + ffi::c_str!("class EmptyClass: pass"), + Some(&namespace), + Some(&namespace), + ) + .unwrap(); + let empty_class = namespace + .get_item("EmptyClass") + .unwrap() + .unwrap() + .downcast::() + .unwrap() + .as_type_ptr(); + + let object_type = PyAny::type_object(py).as_type_ptr(); + unsafe { + assert_eq!( + ffi::PyType_GetSlot(empty_class, ffi::Py_tp_new), + ffi::PyType_GetSlot(object_type, ffi::Py_tp_new) + ); + assert_eq!( + ffi::PyType_GetSlot(empty_class, ffi::Py_tp_init), + ffi::PyType_GetSlot(object_type, ffi::Py_tp_init) + ); + assert!(ffi::PyType_GetSlot(empty_class, ffi::Py_tp_call).is_null()); + } + + let base_class = BaseClass::type_object_raw(py); + unsafe { + // tp_new is always set for pyclasses, not inherited + assert_ne!( + ffi::PyType_GetSlot(base_class, ffi::Py_tp_new), + ffi::PyType_GetSlot(object_type, ffi::Py_tp_new) + ); + assert_eq!( + ffi::PyType_GetSlot(base_class, ffi::Py_tp_init), + ffi::PyType_GetSlot(object_type, ffi::Py_tp_init) + ); + assert!(ffi::PyType_GetSlot(base_class, ffi::Py_tp_call).is_null()); + } + }); + } + + /// Verify the correctness of the documentation describing class initialization. + #[test] + #[cfg(all(Py_3_10, not(Py_LIMITED_API)))] + fn managed_dict_initialized_as_expected() { + use crate::{impl_::pyclass::PyClassImpl, types::PyFloat}; + + unsafe fn get_dict(obj: *mut ffi::PyObject) -> *mut *mut ffi::PyObject { + let dict_offset = T::dict_offset().unwrap(); + (obj as *mut u8).add(usize::try_from(dict_offset).unwrap()) as *mut *mut ffi::PyObject + } + + #[pyclass(crate = "crate", dict)] + struct ClassWithDict {} + + Python::with_gil(|py| { + let obj = Py::new(py, ClassWithDict {}).unwrap(); + unsafe { + let obj_dict = get_dict::(obj.as_ptr()); + assert!((*obj_dict).is_null()); + crate::py_run!(py, obj, "obj.__dict__"); + assert!(!(*obj_dict).is_null()); + } + }); + + #[pyclass(crate = "crate", dict, extends=PyFloat)] + struct ExtendedClassWithDict {} + + Python::with_gil(|py| { + let obj = Py::new(py, ExtendedClassWithDict {}).unwrap(); + unsafe { + let obj_dict = get_dict::(obj.as_ptr()); + assert!((*obj_dict).is_null()); + crate::py_run!(py, obj, "obj.__dict__"); + assert!(!(*obj_dict).is_null()); + } + }); + } } diff --git a/src/sealed.rs b/src/sealed.rs index 0a2846b134a..9fec6769b80 100644 --- a/src/sealed.rs +++ b/src/sealed.rs @@ -51,7 +51,10 @@ impl Sealed for AddClassToModule {} impl Sealed for PyMethodDef {} impl Sealed for ModuleDef {} -impl Sealed for PyNativeTypeInitializer {} +impl Sealed + for PyNativeTypeInitializer +{ +} impl Sealed for PyClassInitializer {} impl Sealed for std::sync::Once {} diff --git a/src/types/any.rs b/src/types/any.rs index d060c187631..586999e3457 100644 --- a/src/types/any.rs +++ b/src/types/any.rs @@ -51,6 +51,16 @@ impl crate::impl_::pyclass::PyClassBaseType for PyAny { type BaseNativeType = PyAny; type Initializer = crate::impl_::pyclass_init::PyNativeTypeInitializer; type PyClassMutability = crate::pycell::impl_::ImmutableClass; + /// `object.__new__` (`ffi::PyBaseObject_Type.tp_new`) has some unique behaviour that must be worked around: + /// - it does not accept any arguments (if args or kwargs are non-empty a `TypeError` is raised) and in the current + /// implementation, arguments are propagated to the native base class `__new__` function. + /// - it initializes `__dict__` if applicable (`#[pyclass(dict)]`). Currently this dict will leak because + /// the pointer to it will be overwritten when the rust-managed data is written to the pyobject. + /// - it checks for any abstract methods and refuses to construct if any are found. This is generally not applicable + /// to pyo3 classes. + /// + /// on the other hand, [ffi::PyType_GenericNew] simply allocates space for the object without initializing anything. + const OVERRIDE_TP_NEW: Option = Some(ffi::PyType_GenericNew); } /// This trait represents the Python APIs which are usable on all Python objects.