From 73773f2fc5e33ac25b24b5291eec61d6e7957434 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 19 May 2024 11:24:22 +0100 Subject: [PATCH 01/11] Move slow path of CPyLong_AsSize_tAndOverflow to a non-inline function --- mypyc/common.py | 1 + mypyc/lib-rt/pythonsupport.c | 52 ++++++++++++++++++++++++++++++++++++ mypyc/lib-rt/pythonsupport.h | 35 +++++------------------- mypyc/lib-rt/setup.py | 1 + 4 files changed, 60 insertions(+), 29 deletions(-) create mode 100644 mypyc/lib-rt/pythonsupport.c diff --git a/mypyc/common.py b/mypyc/common.py index 3d07f6c3d0d3..44dbc330df7e 100644 --- a/mypyc/common.py +++ b/mypyc/common.py @@ -78,6 +78,7 @@ "exc_ops.c", "misc_ops.c", "generic_ops.c", + "pythonsupport.c", ] diff --git a/mypyc/lib-rt/pythonsupport.c b/mypyc/lib-rt/pythonsupport.c new file mode 100644 index 000000000000..5c4df660863b --- /dev/null +++ b/mypyc/lib-rt/pythonsupport.c @@ -0,0 +1,52 @@ +// Collects code that was copied in from cpython, for a couple of different reasons: +// * We wanted to modify it to produce a more efficient version for our uses +// * We needed to call it and it was static :( +// * We wanted to call it and needed to backport it + +#include "pythonsupport.h" + +// Slow path of CPyLong_AsSsize_tAndOverflow (non-inlined) +Py_ssize_t +CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow) +{ + PyLongObject *v = (PyLongObject *)vv; + size_t x, prev; + Py_ssize_t res; + Py_ssize_t i; + int sign; + + *overflow = 0; + + res = -1; + i = CPY_LONG_TAG(v); + + sign = 1; + x = 0; + if (i & CPY_SIGN_NEGATIVE) { + sign = -1; + } + i >>= CPY_NON_SIZE_BITS; + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) + CPY_LONG_DIGIT(v, i); + if ((x >> PyLong_SHIFT) != prev) { + *overflow = sign; + goto exit; + } + } + /* Haven't lost any bits, but casting to long requires extra + * care (see comment above). + */ + if (x <= (size_t)CPY_TAGGED_MAX) { + res = (Py_ssize_t)x * sign; + } + else if (sign < 0 && x == CPY_TAGGED_ABS_MIN) { + res = CPY_TAGGED_MIN; + } + else { + *overflow = sign; + /* res is already set to -1 */ + } + exit: + return res; +} diff --git a/mypyc/lib-rt/pythonsupport.h b/mypyc/lib-rt/pythonsupport.h index f7d501f44a27..d05c13c6ed15 100644 --- a/mypyc/lib-rt/pythonsupport.h +++ b/mypyc/lib-rt/pythonsupport.h @@ -131,15 +131,16 @@ init_subclass(PyTypeObject *type, PyObject *kwds) #if CPY_3_12_FEATURES +Py_ssize_t +CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow); + static inline Py_ssize_t CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) { /* This version by Tim Peters */ PyLongObject *v = (PyLongObject *)vv; - size_t x, prev; Py_ssize_t res; Py_ssize_t i; - int sign; *overflow = 0; @@ -154,33 +155,9 @@ CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) } else if (i == ((1 << CPY_NON_SIZE_BITS) | CPY_SIGN_NEGATIVE)) { res = -(sdigit)CPY_LONG_DIGIT(v, 0); } else { - sign = 1; - x = 0; - if (i & CPY_SIGN_NEGATIVE) { - sign = -1; - } - i >>= CPY_NON_SIZE_BITS; - while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) + CPY_LONG_DIGIT(v, i); - if ((x >> PyLong_SHIFT) != prev) { - *overflow = sign; - goto exit; - } - } - /* Haven't lost any bits, but casting to long requires extra - * care (see comment above). - */ - if (x <= (size_t)CPY_TAGGED_MAX) { - res = (Py_ssize_t)x * sign; - } - else if (sign < 0 && x == CPY_TAGGED_ABS_MIN) { - res = CPY_TAGGED_MIN; - } - else { - *overflow = sign; - /* res is already set to -1 */ - } + int overflow_local; + res = CPyLong_AsSsize_tAndOverflow_(vv, &overflow_local); + *overflow = overflow_local; } exit: return res; diff --git a/mypyc/lib-rt/setup.py b/mypyc/lib-rt/setup.py index ef81b794c9bd..66b130581cb3 100644 --- a/mypyc/lib-rt/setup.py +++ b/mypyc/lib-rt/setup.py @@ -58,6 +58,7 @@ def run(self): "list_ops.c", "exc_ops.c", "generic_ops.c", + "pythonsupport.c", ], depends=["CPy.h", "mypyc_util.h", "pythonsupport.h"], extra_compile_args=["-Wno-unused-function", "-Wno-sign-compare"] + compile_args, From 2615f52d2504cefec6669b72703770cedcb83afc Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 19 May 2024 11:27:04 +0100 Subject: [PATCH 02/11] Inline tagged integer unboxing --- mypyc/lib-rt/CPy.h | 38 +++++++++++++++++++++++++++++++++++--- mypyc/lib-rt/int_ops.c | 35 ----------------------------------- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 1a03f049ecb0..8af477608164 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -120,9 +120,6 @@ static inline size_t CPy_FindAttrOffset(PyTypeObject *trait, CPyVTableItem *vtab CPyTagged CPyTagged_FromSsize_t(Py_ssize_t value); CPyTagged CPyTagged_FromVoidPtr(void *ptr); CPyTagged CPyTagged_FromInt64(int64_t value); -CPyTagged CPyTagged_FromObject(PyObject *object); -CPyTagged CPyTagged_StealFromObject(PyObject *object); -CPyTagged CPyTagged_BorrowFromObject(PyObject *object); PyObject *CPyTagged_AsObject(CPyTagged x); PyObject *CPyTagged_StealAsObject(CPyTagged x); Py_ssize_t CPyTagged_AsSsize_t(CPyTagged x); @@ -199,6 +196,41 @@ static inline PyObject *CPyTagged_LongAsObject(CPyTagged x) { return (PyObject *)(x & ~CPY_INT_TAG); } +static inline CPyTagged CPyTagged_FromObject(PyObject *object) { + int overflow; + // The overflow check knows about CPyTagged's width + Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow); + if (unlikely(overflow != 0)) { + Py_INCREF(object); + return ((CPyTagged)object) | CPY_INT_TAG; + } else { + return value << 1; + } +} + +static inline CPyTagged CPyTagged_StealFromObject(PyObject *object) { + int overflow; + // The overflow check knows about CPyTagged's width + Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow); + if (unlikely(overflow != 0)) { + return ((CPyTagged)object) | CPY_INT_TAG; + } else { + Py_DECREF(object); + return value << 1; + } +} + +static inline CPyTagged CPyTagged_BorrowFromObject(PyObject *object) { + int overflow; + // The overflow check knows about CPyTagged's width + Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow); + if (unlikely(overflow != 0)) { + return ((CPyTagged)object) | CPY_INT_TAG; + } else { + return value << 1; + } +} + static inline bool CPyTagged_TooBig(Py_ssize_t value) { // Micro-optimized for the common case where it fits. return (size_t)value > CPY_TAGGED_MAX diff --git a/mypyc/lib-rt/int_ops.c b/mypyc/lib-rt/int_ops.c index b57d88c6ac93..e34a8cd6af5f 100644 --- a/mypyc/lib-rt/int_ops.c +++ b/mypyc/lib-rt/int_ops.c @@ -44,41 +44,6 @@ CPyTagged CPyTagged_FromInt64(int64_t value) { } } -CPyTagged CPyTagged_FromObject(PyObject *object) { - int overflow; - // The overflow check knows about CPyTagged's width - Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow); - if (unlikely(overflow != 0)) { - Py_INCREF(object); - return ((CPyTagged)object) | CPY_INT_TAG; - } else { - return value << 1; - } -} - -CPyTagged CPyTagged_StealFromObject(PyObject *object) { - int overflow; - // The overflow check knows about CPyTagged's width - Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow); - if (unlikely(overflow != 0)) { - return ((CPyTagged)object) | CPY_INT_TAG; - } else { - Py_DECREF(object); - return value << 1; - } -} - -CPyTagged CPyTagged_BorrowFromObject(PyObject *object) { - int overflow; - // The overflow check knows about CPyTagged's width - Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow); - if (unlikely(overflow != 0)) { - return ((CPyTagged)object) | CPY_INT_TAG; - } else { - return value << 1; - } -} - PyObject *CPyTagged_AsObject(CPyTagged x) { PyObject *value; if (unlikely(CPyTagged_CheckLong(x))) { From b0d06c1466390feb2383ab5f92bbfd10fda08411 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 19 May 2024 11:43:49 +0100 Subject: [PATCH 03/11] Inline i64 and i32 unboxing --- mypyc/lib-rt/CPy.h | 69 ++++++++++++++++++++++++++++++++++++++++-- mypyc/lib-rt/int_ops.c | 65 --------------------------------------- 2 files changed, 67 insertions(+), 67 deletions(-) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 8af477608164..0f4f0f22e22d 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -145,10 +145,8 @@ CPyTagged CPyTagged_FromFloat(double f); PyObject *CPyLong_FromStrWithBase(PyObject *o, CPyTagged base); PyObject *CPyLong_FromStr(PyObject *o); PyObject *CPyBool_Str(bool b); -int64_t CPyLong_AsInt64(PyObject *o); int64_t CPyInt64_Divide(int64_t x, int64_t y); int64_t CPyInt64_Remainder(int64_t x, int64_t y); -int32_t CPyLong_AsInt32(PyObject *o); int32_t CPyInt32_Divide(int32_t x, int32_t y); int32_t CPyInt32_Remainder(int32_t x, int32_t y); void CPyInt32_Overflow(void); @@ -319,6 +317,73 @@ static inline bool CPyTagged_IsLe(CPyTagged left, CPyTagged right) { } +static inline int64_t CPyLong_AsInt64(PyObject *o) { + if (likely(PyLong_Check(o))) { + PyLongObject *lobj = (PyLongObject *)o; + Py_ssize_t size = Py_SIZE(lobj); + if (likely(size == 1)) { + // Fast path + return CPY_LONG_DIGIT(lobj, 0); + } else if (likely(size == 0)) { + return 0; + } + } + // Slow path + int overflow; + int64_t result = PyLong_AsLongLongAndOverflow(o, &overflow); + if (result == -1) { + if (PyErr_Occurred()) { + return CPY_LL_INT_ERROR; + } else if (overflow) { + PyErr_SetString(PyExc_OverflowError, "int too large to convert to i64"); + return CPY_LL_INT_ERROR; + } + } + return result; +} + + +static inline int32_t CPyLong_AsInt32(PyObject *o) { + if (likely(PyLong_Check(o))) { + #if CPY_3_12_FEATURES + PyLongObject *lobj = (PyLongObject *)o; + size_t tag = CPY_LONG_TAG(lobj); + if (likely(tag == (1 << CPY_NON_SIZE_BITS))) { + // Fast path + return CPY_LONG_DIGIT(lobj, 0); + } else if (likely(tag == CPY_SIGN_ZERO)) { + return 0; + } + #else + PyLongObject *lobj = (PyLongObject *)o; + Py_ssize_t size = lobj->ob_base.ob_size; + if (likely(size == 1)) { + // Fast path + return CPY_LONG_DIGIT(lobj, 0); + } else if (likely(size == 0)) { + return 0; + } + #endif + } + // Slow path + int overflow; + long result = PyLong_AsLongAndOverflow(o, &overflow); + if (result > 0x7fffffffLL || result < -0x80000000LL) { + overflow = 1; + result = -1; + } + if (result == -1) { + if (PyErr_Occurred()) { + return CPY_LL_INT_ERROR; + } else if (overflow) { + PyErr_SetString(PyExc_OverflowError, "int too large to convert to i32"); + return CPY_LL_INT_ERROR; + } + } + return result; +} + + // Float operations diff --git a/mypyc/lib-rt/int_ops.c b/mypyc/lib-rt/int_ops.c index e34a8cd6af5f..5a499e3d77b4 100644 --- a/mypyc/lib-rt/int_ops.c +++ b/mypyc/lib-rt/int_ops.c @@ -480,31 +480,6 @@ CPyTagged CPyTagged_Lshift(CPyTagged left, CPyTagged right) { return CPyTagged_StealFromObject(result); } -int64_t CPyLong_AsInt64(PyObject *o) { - if (likely(PyLong_Check(o))) { - PyLongObject *lobj = (PyLongObject *)o; - Py_ssize_t size = Py_SIZE(lobj); - if (likely(size == 1)) { - // Fast path - return CPY_LONG_DIGIT(lobj, 0); - } else if (likely(size == 0)) { - return 0; - } - } - // Slow path - int overflow; - int64_t result = PyLong_AsLongLongAndOverflow(o, &overflow); - if (result == -1) { - if (PyErr_Occurred()) { - return CPY_LL_INT_ERROR; - } else if (overflow) { - PyErr_SetString(PyExc_OverflowError, "int too large to convert to i64"); - return CPY_LL_INT_ERROR; - } - } - return result; -} - int64_t CPyInt64_Divide(int64_t x, int64_t y) { if (y == 0) { PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); @@ -539,46 +514,6 @@ int64_t CPyInt64_Remainder(int64_t x, int64_t y) { return d; } -int32_t CPyLong_AsInt32(PyObject *o) { - if (likely(PyLong_Check(o))) { - #if CPY_3_12_FEATURES - PyLongObject *lobj = (PyLongObject *)o; - size_t tag = CPY_LONG_TAG(lobj); - if (likely(tag == (1 << CPY_NON_SIZE_BITS))) { - // Fast path - return CPY_LONG_DIGIT(lobj, 0); - } else if (likely(tag == CPY_SIGN_ZERO)) { - return 0; - } - #else - PyLongObject *lobj = (PyLongObject *)o; - Py_ssize_t size = lobj->ob_base.ob_size; - if (likely(size == 1)) { - // Fast path - return CPY_LONG_DIGIT(lobj, 0); - } else if (likely(size == 0)) { - return 0; - } - #endif - } - // Slow path - int overflow; - long result = PyLong_AsLongAndOverflow(o, &overflow); - if (result > 0x7fffffffLL || result < -0x80000000LL) { - overflow = 1; - result = -1; - } - if (result == -1) { - if (PyErr_Occurred()) { - return CPY_LL_INT_ERROR; - } else if (overflow) { - PyErr_SetString(PyExc_OverflowError, "int too large to convert to i32"); - return CPY_LL_INT_ERROR; - } - } - return result; -} - int32_t CPyInt32_Divide(int32_t x, int32_t y) { if (y == 0) { PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); From abf20c449da7030606f5173e2574b84ae44addec Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 19 May 2024 11:45:40 +0100 Subject: [PATCH 04/11] Inline i16 and u8 unboxing --- mypyc/lib-rt/CPy.h | 91 +++++++++++++++++++++++++++++++++++++++++- mypyc/lib-rt/int_ops.c | 89 ----------------------------------------- 2 files changed, 89 insertions(+), 91 deletions(-) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 0f4f0f22e22d..b62cf164e23a 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -150,11 +150,9 @@ int64_t CPyInt64_Remainder(int64_t x, int64_t y); int32_t CPyInt32_Divide(int32_t x, int32_t y); int32_t CPyInt32_Remainder(int32_t x, int32_t y); void CPyInt32_Overflow(void); -int16_t CPyLong_AsInt16(PyObject *o); int16_t CPyInt16_Divide(int16_t x, int16_t y); int16_t CPyInt16_Remainder(int16_t x, int16_t y); void CPyInt16_Overflow(void); -uint8_t CPyLong_AsUInt8(PyObject *o); void CPyUInt8_Overflow(void); double CPyTagged_TrueDivide(CPyTagged x, CPyTagged y); @@ -383,6 +381,95 @@ static inline int32_t CPyLong_AsInt32(PyObject *o) { return result; } +static inline int16_t CPyLong_AsInt16(PyObject *o) { + if (likely(PyLong_Check(o))) { + #if CPY_3_12_FEATURES + PyLongObject *lobj = (PyLongObject *)o; + size_t tag = CPY_LONG_TAG(lobj); + if (likely(tag == (1 << CPY_NON_SIZE_BITS))) { + // Fast path + digit x = CPY_LONG_DIGIT(lobj, 0); + if (x < 0x8000) + return x; + } else if (likely(tag == CPY_SIGN_ZERO)) { + return 0; + } + #else + PyLongObject *lobj = (PyLongObject *)o; + Py_ssize_t size = lobj->ob_base.ob_size; + if (likely(size == 1)) { + // Fast path + digit x = lobj->ob_digit[0]; + if (x < 0x8000) + return x; + } else if (likely(size == 0)) { + return 0; + } + #endif + } + // Slow path + int overflow; + long result = PyLong_AsLongAndOverflow(o, &overflow); + if (result > 0x7fff || result < -0x8000) { + overflow = 1; + result = -1; + } + if (result == -1) { + if (PyErr_Occurred()) { + return CPY_LL_INT_ERROR; + } else if (overflow) { + PyErr_SetString(PyExc_OverflowError, "int too large to convert to i16"); + return CPY_LL_INT_ERROR; + } + } + return result; +} + + +static inline uint8_t CPyLong_AsUInt8(PyObject *o) { + if (likely(PyLong_Check(o))) { + #if CPY_3_12_FEATURES + PyLongObject *lobj = (PyLongObject *)o; + size_t tag = CPY_LONG_TAG(lobj); + if (likely(tag == (1 << CPY_NON_SIZE_BITS))) { + // Fast path + digit x = CPY_LONG_DIGIT(lobj, 0); + if (x < 256) + return x; + } else if (likely(tag == CPY_SIGN_ZERO)) { + return 0; + } + #else + PyLongObject *lobj = (PyLongObject *)o; + Py_ssize_t size = lobj->ob_base.ob_size; + if (likely(size == 1)) { + // Fast path + digit x = lobj->ob_digit[0]; + if (x < 256) + return x; + } else if (likely(size == 0)) { + return 0; + } + #endif + } + // Slow path + int overflow; + long result = PyLong_AsLongAndOverflow(o, &overflow); + if (result < 0 || result >= 256) { + overflow = 1; + result = -1; + } + if (result == -1) { + if (PyErr_Occurred()) { + return CPY_LL_UINT_ERROR; + } else if (overflow) { + PyErr_SetString(PyExc_OverflowError, "int too large or small to convert to u8"); + return CPY_LL_UINT_ERROR; + } + } + return result; +} + // Float operations diff --git a/mypyc/lib-rt/int_ops.c b/mypyc/lib-rt/int_ops.c index 5a499e3d77b4..2806198c297f 100644 --- a/mypyc/lib-rt/int_ops.c +++ b/mypyc/lib-rt/int_ops.c @@ -552,50 +552,6 @@ void CPyInt32_Overflow() { PyErr_SetString(PyExc_OverflowError, "int too large to convert to i32"); } -int16_t CPyLong_AsInt16(PyObject *o) { - if (likely(PyLong_Check(o))) { - #if CPY_3_12_FEATURES - PyLongObject *lobj = (PyLongObject *)o; - size_t tag = CPY_LONG_TAG(lobj); - if (likely(tag == (1 << CPY_NON_SIZE_BITS))) { - // Fast path - digit x = CPY_LONG_DIGIT(lobj, 0); - if (x < 0x8000) - return x; - } else if (likely(tag == CPY_SIGN_ZERO)) { - return 0; - } - #else - PyLongObject *lobj = (PyLongObject *)o; - Py_ssize_t size = lobj->ob_base.ob_size; - if (likely(size == 1)) { - // Fast path - digit x = lobj->ob_digit[0]; - if (x < 0x8000) - return x; - } else if (likely(size == 0)) { - return 0; - } - #endif - } - // Slow path - int overflow; - long result = PyLong_AsLongAndOverflow(o, &overflow); - if (result > 0x7fff || result < -0x8000) { - overflow = 1; - result = -1; - } - if (result == -1) { - if (PyErr_Occurred()) { - return CPY_LL_INT_ERROR; - } else if (overflow) { - PyErr_SetString(PyExc_OverflowError, "int too large to convert to i16"); - return CPY_LL_INT_ERROR; - } - } - return result; -} - int16_t CPyInt16_Divide(int16_t x, int16_t y) { if (y == 0) { PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); @@ -634,51 +590,6 @@ void CPyInt16_Overflow() { PyErr_SetString(PyExc_OverflowError, "int too large to convert to i16"); } - -uint8_t CPyLong_AsUInt8(PyObject *o) { - if (likely(PyLong_Check(o))) { - #if CPY_3_12_FEATURES - PyLongObject *lobj = (PyLongObject *)o; - size_t tag = CPY_LONG_TAG(lobj); - if (likely(tag == (1 << CPY_NON_SIZE_BITS))) { - // Fast path - digit x = CPY_LONG_DIGIT(lobj, 0); - if (x < 256) - return x; - } else if (likely(tag == CPY_SIGN_ZERO)) { - return 0; - } - #else - PyLongObject *lobj = (PyLongObject *)o; - Py_ssize_t size = lobj->ob_base.ob_size; - if (likely(size == 1)) { - // Fast path - digit x = lobj->ob_digit[0]; - if (x < 256) - return x; - } else if (likely(size == 0)) { - return 0; - } - #endif - } - // Slow path - int overflow; - long result = PyLong_AsLongAndOverflow(o, &overflow); - if (result < 0 || result >= 256) { - overflow = 1; - result = -1; - } - if (result == -1) { - if (PyErr_Occurred()) { - return CPY_LL_UINT_ERROR; - } else if (overflow) { - PyErr_SetString(PyExc_OverflowError, "int too large or small to convert to u8"); - return CPY_LL_UINT_ERROR; - } - } - return result; -} - void CPyUInt8_Overflow() { PyErr_SetString(PyExc_OverflowError, "int too large or small to convert to u8"); } From 235225bb848c238efdd435cf49e1e04303aa9c99 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 19 May 2024 12:22:34 +0100 Subject: [PATCH 05/11] Only inline fast path of i64 unboxing --- mypyc/lib-rt/CPy.h | 16 ++-------------- mypyc/lib-rt/int_ops.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index b62cf164e23a..fecd93d19a75 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -145,6 +145,7 @@ CPyTagged CPyTagged_FromFloat(double f); PyObject *CPyLong_FromStrWithBase(PyObject *o, CPyTagged base); PyObject *CPyLong_FromStr(PyObject *o); PyObject *CPyBool_Str(bool b); +int64_t CPyLong_AsInt64_(PyObject *o); int64_t CPyInt64_Divide(int64_t x, int64_t y); int64_t CPyInt64_Remainder(int64_t x, int64_t y); int32_t CPyInt32_Divide(int32_t x, int32_t y); @@ -314,7 +315,6 @@ static inline bool CPyTagged_IsLe(CPyTagged left, CPyTagged right) { } } - static inline int64_t CPyLong_AsInt64(PyObject *o) { if (likely(PyLong_Check(o))) { PyLongObject *lobj = (PyLongObject *)o; @@ -327,20 +327,9 @@ static inline int64_t CPyLong_AsInt64(PyObject *o) { } } // Slow path - int overflow; - int64_t result = PyLong_AsLongLongAndOverflow(o, &overflow); - if (result == -1) { - if (PyErr_Occurred()) { - return CPY_LL_INT_ERROR; - } else if (overflow) { - PyErr_SetString(PyExc_OverflowError, "int too large to convert to i64"); - return CPY_LL_INT_ERROR; - } - } - return result; + return CPyLong_AsInt64_(o); } - static inline int32_t CPyLong_AsInt32(PyObject *o) { if (likely(PyLong_Check(o))) { #if CPY_3_12_FEATURES @@ -425,7 +414,6 @@ static inline int16_t CPyLong_AsInt16(PyObject *o) { return result; } - static inline uint8_t CPyLong_AsUInt8(PyObject *o) { if (likely(PyLong_Check(o))) { #if CPY_3_12_FEATURES diff --git a/mypyc/lib-rt/int_ops.c b/mypyc/lib-rt/int_ops.c index 2806198c297f..3cfeb9b06f6a 100644 --- a/mypyc/lib-rt/int_ops.c +++ b/mypyc/lib-rt/int_ops.c @@ -480,6 +480,21 @@ CPyTagged CPyTagged_Lshift(CPyTagged left, CPyTagged right) { return CPyTagged_StealFromObject(result); } +// i64 unboxing slow path +int64_t CPyLong_AsInt64_(PyObject *o) { + int overflow; + int64_t result = PyLong_AsLongLongAndOverflow(o, &overflow); + if (result == -1) { + if (PyErr_Occurred()) { + return CPY_LL_INT_ERROR; + } else if (overflow) { + PyErr_SetString(PyExc_OverflowError, "int too large to convert to i64"); + return CPY_LL_INT_ERROR; + } + } + return result; +} + int64_t CPyInt64_Divide(int64_t x, int64_t y) { if (y == 0) { PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); From 3403c3247e015b132462a940031ce4db9b4af5e4 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 19 May 2024 12:24:28 +0100 Subject: [PATCH 06/11] Only inline fast path of i32 unboxing --- mypyc/lib-rt/CPy.h | 17 ++--------------- mypyc/lib-rt/int_ops.c | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index fecd93d19a75..2acab1515210 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -148,6 +148,7 @@ PyObject *CPyBool_Str(bool b); int64_t CPyLong_AsInt64_(PyObject *o); int64_t CPyInt64_Divide(int64_t x, int64_t y); int64_t CPyInt64_Remainder(int64_t x, int64_t y); +int32_t CPyLong_AsInt32_(PyObject *o); int32_t CPyInt32_Divide(int32_t x, int32_t y); int32_t CPyInt32_Remainder(int32_t x, int32_t y); void CPyInt32_Overflow(void); @@ -353,21 +354,7 @@ static inline int32_t CPyLong_AsInt32(PyObject *o) { #endif } // Slow path - int overflow; - long result = PyLong_AsLongAndOverflow(o, &overflow); - if (result > 0x7fffffffLL || result < -0x80000000LL) { - overflow = 1; - result = -1; - } - if (result == -1) { - if (PyErr_Occurred()) { - return CPY_LL_INT_ERROR; - } else if (overflow) { - PyErr_SetString(PyExc_OverflowError, "int too large to convert to i32"); - return CPY_LL_INT_ERROR; - } - } - return result; + return CPyLong_AsInt32_(o); } static inline int16_t CPyLong_AsInt16(PyObject *o) { diff --git a/mypyc/lib-rt/int_ops.c b/mypyc/lib-rt/int_ops.c index 3cfeb9b06f6a..4e420e41cc2c 100644 --- a/mypyc/lib-rt/int_ops.c +++ b/mypyc/lib-rt/int_ops.c @@ -529,6 +529,25 @@ int64_t CPyInt64_Remainder(int64_t x, int64_t y) { return d; } +// i32 unboxing slow path +int32_t CPyLong_AsInt32_(PyObject *o) { + int overflow; + long result = PyLong_AsLongAndOverflow(o, &overflow); + if (result > 0x7fffffffLL || result < -0x80000000LL) { + overflow = 1; + result = -1; + } + if (result == -1) { + if (PyErr_Occurred()) { + return CPY_LL_INT_ERROR; + } else if (overflow) { + PyErr_SetString(PyExc_OverflowError, "int too large to convert to i32"); + return CPY_LL_INT_ERROR; + } + } + return result; +} + int32_t CPyInt32_Divide(int32_t x, int32_t y) { if (y == 0) { PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); From 0be8772849834fcdf4c4707eb604b5b0cccb9ae8 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 19 May 2024 12:28:02 +0100 Subject: [PATCH 07/11] Only inline fast path of i16 unboxing --- mypyc/lib-rt/CPy.h | 17 ++--------------- mypyc/lib-rt/int_ops.c | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 2acab1515210..0376bbcf1109 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -152,6 +152,7 @@ int32_t CPyLong_AsInt32_(PyObject *o); int32_t CPyInt32_Divide(int32_t x, int32_t y); int32_t CPyInt32_Remainder(int32_t x, int32_t y); void CPyInt32_Overflow(void); +int16_t CPyLong_AsInt16_(PyObject *o); int16_t CPyInt16_Divide(int16_t x, int16_t y); int16_t CPyInt16_Remainder(int16_t x, int16_t y); void CPyInt16_Overflow(void); @@ -384,21 +385,7 @@ static inline int16_t CPyLong_AsInt16(PyObject *o) { #endif } // Slow path - int overflow; - long result = PyLong_AsLongAndOverflow(o, &overflow); - if (result > 0x7fff || result < -0x8000) { - overflow = 1; - result = -1; - } - if (result == -1) { - if (PyErr_Occurred()) { - return CPY_LL_INT_ERROR; - } else if (overflow) { - PyErr_SetString(PyExc_OverflowError, "int too large to convert to i16"); - return CPY_LL_INT_ERROR; - } - } - return result; + return CPyLong_AsInt16_(o); } static inline uint8_t CPyLong_AsUInt8(PyObject *o) { diff --git a/mypyc/lib-rt/int_ops.c b/mypyc/lib-rt/int_ops.c index 4e420e41cc2c..83c89d558a1c 100644 --- a/mypyc/lib-rt/int_ops.c +++ b/mypyc/lib-rt/int_ops.c @@ -586,6 +586,25 @@ void CPyInt32_Overflow() { PyErr_SetString(PyExc_OverflowError, "int too large to convert to i32"); } +// i16 unboxing slow path +int16_t CPyLong_AsInt16_(PyObject *o) { + int overflow; + long result = PyLong_AsLongAndOverflow(o, &overflow); + if (result > 0x7fff || result < -0x8000) { + overflow = 1; + result = -1; + } + if (result == -1) { + if (PyErr_Occurred()) { + return CPY_LL_INT_ERROR; + } else if (overflow) { + PyErr_SetString(PyExc_OverflowError, "int too large to convert to i16"); + return CPY_LL_INT_ERROR; + } + } + return result; +} + int16_t CPyInt16_Divide(int16_t x, int16_t y) { if (y == 0) { PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); From c0df723251803b235c43327150efab4565204dd9 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 19 May 2024 12:30:48 +0100 Subject: [PATCH 08/11] Only inline fast path of u8 unboxing --- mypyc/lib-rt/CPy.h | 17 ++--------------- mypyc/lib-rt/int_ops.c | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 0376bbcf1109..bd4d81fb0730 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -156,6 +156,7 @@ int16_t CPyLong_AsInt16_(PyObject *o); int16_t CPyInt16_Divide(int16_t x, int16_t y); int16_t CPyInt16_Remainder(int16_t x, int16_t y); void CPyInt16_Overflow(void); +uint8_t CPyLong_AsUInt8_(PyObject *o); void CPyUInt8_Overflow(void); double CPyTagged_TrueDivide(CPyTagged x, CPyTagged y); @@ -415,21 +416,7 @@ static inline uint8_t CPyLong_AsUInt8(PyObject *o) { #endif } // Slow path - int overflow; - long result = PyLong_AsLongAndOverflow(o, &overflow); - if (result < 0 || result >= 256) { - overflow = 1; - result = -1; - } - if (result == -1) { - if (PyErr_Occurred()) { - return CPY_LL_UINT_ERROR; - } else if (overflow) { - PyErr_SetString(PyExc_OverflowError, "int too large or small to convert to u8"); - return CPY_LL_UINT_ERROR; - } - } - return result; + return CPyLong_AsUInt8_(o); } diff --git a/mypyc/lib-rt/int_ops.c b/mypyc/lib-rt/int_ops.c index 83c89d558a1c..4c03ae265d8b 100644 --- a/mypyc/lib-rt/int_ops.c +++ b/mypyc/lib-rt/int_ops.c @@ -643,6 +643,25 @@ void CPyInt16_Overflow() { PyErr_SetString(PyExc_OverflowError, "int too large to convert to i16"); } +// u8 unboxing slow path +uint8_t CPyLong_AsUInt8_(PyObject *o) { + int overflow; + long result = PyLong_AsLongAndOverflow(o, &overflow); + if (result < 0 || result >= 256) { + overflow = 1; + result = -1; + } + if (result == -1) { + if (PyErr_Occurred()) { + return CPY_LL_UINT_ERROR; + } else if (overflow) { + PyErr_SetString(PyExc_OverflowError, "int too large or small to convert to u8"); + return CPY_LL_UINT_ERROR; + } + } + return result; +} + void CPyUInt8_Overflow() { PyErr_SetString(PyExc_OverflowError, "int too large or small to convert to u8"); } From 15a2ae32349aa4db2a27b3bcd7a5cd34bf6e883e Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Sun, 19 May 2024 13:15:09 +0100 Subject: [PATCH 09/11] Add comment --- mypyc/lib-rt/pythonsupport.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mypyc/lib-rt/pythonsupport.h b/mypyc/lib-rt/pythonsupport.h index d05c13c6ed15..b11270a5d08b 100644 --- a/mypyc/lib-rt/pythonsupport.h +++ b/mypyc/lib-rt/pythonsupport.h @@ -155,6 +155,8 @@ CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) } else if (i == ((1 << CPY_NON_SIZE_BITS) | CPY_SIGN_NEGATIVE)) { res = -(sdigit)CPY_LONG_DIGIT(v, 0); } else { + // Slow path is moved to a non-inline helper function to + // limit size of generated code int overflow_local; res = CPyLong_AsSsize_tAndOverflow_(vv, &overflow_local); *overflow = overflow_local; From f548b45b9b441c77a5a3451f54acf4d82ee8a357 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 20 May 2024 21:20:27 +0100 Subject: [PATCH 10/11] Fix Python 3.11 and older --- mypyc/lib-rt/pythonsupport.c | 54 ++++++++++++++++++++++++++++++++++++ mypyc/lib-rt/pythonsupport.h | 40 +++++--------------------- 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/mypyc/lib-rt/pythonsupport.c b/mypyc/lib-rt/pythonsupport.c index 5c4df660863b..c2dda4f95ad2 100644 --- a/mypyc/lib-rt/pythonsupport.c +++ b/mypyc/lib-rt/pythonsupport.c @@ -5,6 +5,8 @@ #include "pythonsupport.h" +#if CPY_3_12_FEATURES + // Slow path of CPyLong_AsSsize_tAndOverflow (non-inlined) Py_ssize_t CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow) @@ -50,3 +52,55 @@ CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow) exit: return res; } + +#else + +// Slow path of CPyLong_AsSsize_tAndOverflow (non-inlined, Python 3.11 and earlier) +Py_ssize_t +CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow) +{ + /* This version by Tim Peters */ + PyLongObject *v = (PyLongObject *)vv; + size_t x, prev; + Py_ssize_t res; + Py_ssize_t i; + int sign; + + *overflow = 0; + + res = -1; + i = Py_SIZE(v); + + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -(i); + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) + CPY_LONG_DIGIT(v, i); + if ((x >> PyLong_SHIFT) != prev) { + *overflow = sign; + goto exit; + } + } + /* Haven't lost any bits, but casting to long requires extra + * care (see comment above). + */ + if (x <= (size_t)CPY_TAGGED_MAX) { + res = (Py_ssize_t)x * sign; + } + else if (sign < 0 && x == CPY_TAGGED_ABS_MIN) { + res = CPY_TAGGED_MIN; + } + else { + *overflow = sign; + /* res is already set to -1 */ + } + exit: + return res; +} + + +#endif diff --git a/mypyc/lib-rt/pythonsupport.h b/mypyc/lib-rt/pythonsupport.h index b11270a5d08b..85f9ec64ac90 100644 --- a/mypyc/lib-rt/pythonsupport.h +++ b/mypyc/lib-rt/pythonsupport.h @@ -129,11 +129,11 @@ init_subclass(PyTypeObject *type, PyObject *kwds) return 0; } -#if CPY_3_12_FEATURES - Py_ssize_t CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow); +#if CPY_3_12_FEATURES + static inline Py_ssize_t CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) { @@ -161,7 +161,6 @@ CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) res = CPyLong_AsSsize_tAndOverflow_(vv, &overflow_local); *overflow = overflow_local; } - exit: return res; } @@ -183,10 +182,8 @@ CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) { /* This version by Tim Peters */ PyLongObject *v = (PyLongObject *)vv; - size_t x, prev; Py_ssize_t res; Py_ssize_t i; - int sign; *overflow = 0; @@ -200,35 +197,12 @@ CPyLong_AsSsize_tAndOverflow(PyObject *vv, int *overflow) } else if (i == -1) { res = -(sdigit)CPY_LONG_DIGIT(v, 0); } else { - sign = 1; - x = 0; - if (i < 0) { - sign = -1; - i = -(i); - } - while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) + CPY_LONG_DIGIT(v, i); - if ((x >> PyLong_SHIFT) != prev) { - *overflow = sign; - goto exit; - } - } - /* Haven't lost any bits, but casting to long requires extra - * care (see comment above). - */ - if (x <= (size_t)CPY_TAGGED_MAX) { - res = (Py_ssize_t)x * sign; - } - else if (sign < 0 && x == CPY_TAGGED_ABS_MIN) { - res = CPY_TAGGED_MIN; - } - else { - *overflow = sign; - /* res is already set to -1 */ - } + // Slow path is moved to a non-inline helper function to + // limit size of generated code + int overflow_local; + res = CPyLong_AsSsize_tAndOverflow_(vv, &overflow_local); + *overflow = overflow_local; } - exit: return res; } From 787f0ced49b8a38fbb97179d2aa903760de00096 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 17 Jun 2024 10:28:13 +0100 Subject: [PATCH 11/11] Update comments --- mypyc/lib-rt/pythonsupport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypyc/lib-rt/pythonsupport.c b/mypyc/lib-rt/pythonsupport.c index c2dda4f95ad2..90fb69705a00 100644 --- a/mypyc/lib-rt/pythonsupport.c +++ b/mypyc/lib-rt/pythonsupport.c @@ -37,7 +37,7 @@ CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow) } } /* Haven't lost any bits, but casting to long requires extra - * care (see comment above). + * care. */ if (x <= (size_t)CPY_TAGGED_MAX) { res = (Py_ssize_t)x * sign; @@ -86,7 +86,7 @@ CPyLong_AsSsize_tAndOverflow_(PyObject *vv, int *overflow) } } /* Haven't lost any bits, but casting to long requires extra - * care (see comment above). + * care. */ if (x <= (size_t)CPY_TAGGED_MAX) { res = (Py_ssize_t)x * sign;