Skip to content

Commit

Permalink
[mypyc] Inline fast paths of integer unboxing operations (#17266)
Browse files Browse the repository at this point in the history
This applies to `int` and native integer types.

This speeds up this micro-benchmark by up to 80% (it spends most of the
time
unboxing integers):
```
# a is list[int]/list[i64]/...
for i in a:
    if i == 789:
        n += 1
```
The impact to compile time when self-compiling is below the noise floor.
The generated
binary is about 0.1% larger. Since integer unboxing can be
performance-critical,
this seems like a decent win.

Closes mypyc/mypyc#987. Work on mypyc/mypyc#757.
  • Loading branch information
JukkaL authored Jun 17, 2024
1 parent b81b9e0 commit 31faa43
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 192 deletions.
1 change: 1 addition & 0 deletions mypyc/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
"exc_ops.c",
"misc_ops.c",
"generic_ops.c",
"pythonsupport.c",
]


Expand Down
147 changes: 140 additions & 7 deletions mypyc/lib-rt/CPy.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,6 @@ static inline size_t CPy_FindAttrOffset(PyTypeObject *trait, CPyVTableItem *vtab
CPyTagged CPyTagged_FromSsize_t(Py_ssize_t value);
CPyTagged CPyTagged_FromVoidPtr(void *ptr);
CPyTagged CPyTagged_FromInt64(int64_t value);
CPyTagged CPyTagged_FromObject(PyObject *object);
CPyTagged CPyTagged_StealFromObject(PyObject *object);
CPyTagged CPyTagged_BorrowFromObject(PyObject *object);
PyObject *CPyTagged_AsObject(CPyTagged x);
PyObject *CPyTagged_StealAsObject(CPyTagged x);
Py_ssize_t CPyTagged_AsSsize_t(CPyTagged x);
Expand All @@ -148,18 +145,18 @@ CPyTagged CPyTagged_FromFloat(double f);
PyObject *CPyLong_FromStrWithBase(PyObject *o, CPyTagged base);
PyObject *CPyLong_FromStr(PyObject *o);
PyObject *CPyBool_Str(bool b);
int64_t CPyLong_AsInt64(PyObject *o);
int64_t CPyLong_AsInt64_(PyObject *o);
int64_t CPyInt64_Divide(int64_t x, int64_t y);
int64_t CPyInt64_Remainder(int64_t x, int64_t y);
int32_t CPyLong_AsInt32(PyObject *o);
int32_t CPyLong_AsInt32_(PyObject *o);
int32_t CPyInt32_Divide(int32_t x, int32_t y);
int32_t CPyInt32_Remainder(int32_t x, int32_t y);
void CPyInt32_Overflow(void);
int16_t CPyLong_AsInt16(PyObject *o);
int16_t CPyLong_AsInt16_(PyObject *o);
int16_t CPyInt16_Divide(int16_t x, int16_t y);
int16_t CPyInt16_Remainder(int16_t x, int16_t y);
void CPyInt16_Overflow(void);
uint8_t CPyLong_AsUInt8(PyObject *o);
uint8_t CPyLong_AsUInt8_(PyObject *o);
void CPyUInt8_Overflow(void);
double CPyTagged_TrueDivide(CPyTagged x, CPyTagged y);

Expand Down Expand Up @@ -199,6 +196,41 @@ static inline PyObject *CPyTagged_LongAsObject(CPyTagged x) {
return (PyObject *)(x & ~CPY_INT_TAG);
}

static inline CPyTagged CPyTagged_FromObject(PyObject *object) {
int overflow;
// The overflow check knows about CPyTagged's width
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
if (unlikely(overflow != 0)) {
Py_INCREF(object);
return ((CPyTagged)object) | CPY_INT_TAG;
} else {
return value << 1;
}
}

static inline CPyTagged CPyTagged_StealFromObject(PyObject *object) {
int overflow;
// The overflow check knows about CPyTagged's width
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
if (unlikely(overflow != 0)) {
return ((CPyTagged)object) | CPY_INT_TAG;
} else {
Py_DECREF(object);
return value << 1;
}
}

static inline CPyTagged CPyTagged_BorrowFromObject(PyObject *object) {
int overflow;
// The overflow check knows about CPyTagged's width
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
if (unlikely(overflow != 0)) {
return ((CPyTagged)object) | CPY_INT_TAG;
} else {
return value << 1;
}
}

static inline bool CPyTagged_TooBig(Py_ssize_t value) {
// Micro-optimized for the common case where it fits.
return (size_t)value > CPY_TAGGED_MAX
Expand Down Expand Up @@ -286,6 +318,107 @@ static inline bool CPyTagged_IsLe(CPyTagged left, CPyTagged right) {
}
}

static inline int64_t CPyLong_AsInt64(PyObject *o) {
if (likely(PyLong_Check(o))) {
PyLongObject *lobj = (PyLongObject *)o;
Py_ssize_t size = Py_SIZE(lobj);
if (likely(size == 1)) {
// Fast path
return CPY_LONG_DIGIT(lobj, 0);
} else if (likely(size == 0)) {
return 0;
}
}
// Slow path
return CPyLong_AsInt64_(o);
}

static inline int32_t CPyLong_AsInt32(PyObject *o) {
if (likely(PyLong_Check(o))) {
#if CPY_3_12_FEATURES
PyLongObject *lobj = (PyLongObject *)o;
size_t tag = CPY_LONG_TAG(lobj);
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
// Fast path
return CPY_LONG_DIGIT(lobj, 0);
} else if (likely(tag == CPY_SIGN_ZERO)) {
return 0;
}
#else
PyLongObject *lobj = (PyLongObject *)o;
Py_ssize_t size = lobj->ob_base.ob_size;
if (likely(size == 1)) {
// Fast path
return CPY_LONG_DIGIT(lobj, 0);
} else if (likely(size == 0)) {
return 0;
}
#endif
}
// Slow path
return CPyLong_AsInt32_(o);
}

static inline int16_t CPyLong_AsInt16(PyObject *o) {
if (likely(PyLong_Check(o))) {
#if CPY_3_12_FEATURES
PyLongObject *lobj = (PyLongObject *)o;
size_t tag = CPY_LONG_TAG(lobj);
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
// Fast path
digit x = CPY_LONG_DIGIT(lobj, 0);
if (x < 0x8000)
return x;
} else if (likely(tag == CPY_SIGN_ZERO)) {
return 0;
}
#else
PyLongObject *lobj = (PyLongObject *)o;
Py_ssize_t size = lobj->ob_base.ob_size;
if (likely(size == 1)) {
// Fast path
digit x = lobj->ob_digit[0];
if (x < 0x8000)
return x;
} else if (likely(size == 0)) {
return 0;
}
#endif
}
// Slow path
return CPyLong_AsInt16_(o);
}

static inline uint8_t CPyLong_AsUInt8(PyObject *o) {
if (likely(PyLong_Check(o))) {
#if CPY_3_12_FEATURES
PyLongObject *lobj = (PyLongObject *)o;
size_t tag = CPY_LONG_TAG(lobj);
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
// Fast path
digit x = CPY_LONG_DIGIT(lobj, 0);
if (x < 256)
return x;
} else if (likely(tag == CPY_SIGN_ZERO)) {
return 0;
}
#else
PyLongObject *lobj = (PyLongObject *)o;
Py_ssize_t size = lobj->ob_base.ob_size;
if (likely(size == 1)) {
// Fast path
digit x = lobj->ob_digit[0];
if (x < 256)
return x;
} else if (likely(size == 0)) {
return 0;
}
#endif
}
// Slow path
return CPyLong_AsUInt8_(o);
}

static inline CPyTagged CPyTagged_Negate(CPyTagged num) {
if (likely(CPyTagged_CheckShort(num)
&& num != (CPyTagged) ((Py_ssize_t)1 << (CPY_INT_BITS - 1)))) {
Expand Down
133 changes: 8 additions & 125 deletions mypyc/lib-rt/int_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,41 +44,6 @@ CPyTagged CPyTagged_FromInt64(int64_t value) {
}
}

CPyTagged CPyTagged_FromObject(PyObject *object) {
int overflow;
// The overflow check knows about CPyTagged's width
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
if (unlikely(overflow != 0)) {
Py_INCREF(object);
return ((CPyTagged)object) | CPY_INT_TAG;
} else {
return value << 1;
}
}

CPyTagged CPyTagged_StealFromObject(PyObject *object) {
int overflow;
// The overflow check knows about CPyTagged's width
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
if (unlikely(overflow != 0)) {
return ((CPyTagged)object) | CPY_INT_TAG;
} else {
Py_DECREF(object);
return value << 1;
}
}

CPyTagged CPyTagged_BorrowFromObject(PyObject *object) {
int overflow;
// The overflow check knows about CPyTagged's width
Py_ssize_t value = CPyLong_AsSsize_tAndOverflow(object, &overflow);
if (unlikely(overflow != 0)) {
return ((CPyTagged)object) | CPY_INT_TAG;
} else {
return value << 1;
}
}

PyObject *CPyTagged_AsObject(CPyTagged x) {
PyObject *value;
if (unlikely(CPyTagged_CheckLong(x))) {
Expand Down Expand Up @@ -420,18 +385,8 @@ CPyTagged CPyTagged_Lshift_(CPyTagged left, CPyTagged right) {
return CPyTagged_StealFromObject(result);
}

int64_t CPyLong_AsInt64(PyObject *o) {
if (likely(PyLong_Check(o))) {
PyLongObject *lobj = (PyLongObject *)o;
Py_ssize_t size = Py_SIZE(lobj);
if (likely(size == 1)) {
// Fast path
return CPY_LONG_DIGIT(lobj, 0);
} else if (likely(size == 0)) {
return 0;
}
}
// Slow path
// i64 unboxing slow path
int64_t CPyLong_AsInt64_(PyObject *o) {
int overflow;
int64_t result = PyLong_AsLongLongAndOverflow(o, &overflow);
if (result == -1) {
Expand Down Expand Up @@ -479,29 +434,8 @@ int64_t CPyInt64_Remainder(int64_t x, int64_t y) {
return d;
}

int32_t CPyLong_AsInt32(PyObject *o) {
if (likely(PyLong_Check(o))) {
#if CPY_3_12_FEATURES
PyLongObject *lobj = (PyLongObject *)o;
size_t tag = CPY_LONG_TAG(lobj);
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
// Fast path
return CPY_LONG_DIGIT(lobj, 0);
} else if (likely(tag == CPY_SIGN_ZERO)) {
return 0;
}
#else
PyLongObject *lobj = (PyLongObject *)o;
Py_ssize_t size = lobj->ob_base.ob_size;
if (likely(size == 1)) {
// Fast path
return CPY_LONG_DIGIT(lobj, 0);
} else if (likely(size == 0)) {
return 0;
}
#endif
}
// Slow path
// i32 unboxing slow path
int32_t CPyLong_AsInt32_(PyObject *o) {
int overflow;
long result = PyLong_AsLongAndOverflow(o, &overflow);
if (result > 0x7fffffffLL || result < -0x80000000LL) {
Expand Down Expand Up @@ -557,33 +491,8 @@ void CPyInt32_Overflow() {
PyErr_SetString(PyExc_OverflowError, "int too large to convert to i32");
}

int16_t CPyLong_AsInt16(PyObject *o) {
if (likely(PyLong_Check(o))) {
#if CPY_3_12_FEATURES
PyLongObject *lobj = (PyLongObject *)o;
size_t tag = CPY_LONG_TAG(lobj);
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
// Fast path
digit x = CPY_LONG_DIGIT(lobj, 0);
if (x < 0x8000)
return x;
} else if (likely(tag == CPY_SIGN_ZERO)) {
return 0;
}
#else
PyLongObject *lobj = (PyLongObject *)o;
Py_ssize_t size = lobj->ob_base.ob_size;
if (likely(size == 1)) {
// Fast path
digit x = lobj->ob_digit[0];
if (x < 0x8000)
return x;
} else if (likely(size == 0)) {
return 0;
}
#endif
}
// Slow path
// i16 unboxing slow path
int16_t CPyLong_AsInt16_(PyObject *o) {
int overflow;
long result = PyLong_AsLongAndOverflow(o, &overflow);
if (result > 0x7fff || result < -0x8000) {
Expand Down Expand Up @@ -639,34 +548,8 @@ void CPyInt16_Overflow() {
PyErr_SetString(PyExc_OverflowError, "int too large to convert to i16");
}


uint8_t CPyLong_AsUInt8(PyObject *o) {
if (likely(PyLong_Check(o))) {
#if CPY_3_12_FEATURES
PyLongObject *lobj = (PyLongObject *)o;
size_t tag = CPY_LONG_TAG(lobj);
if (likely(tag == (1 << CPY_NON_SIZE_BITS))) {
// Fast path
digit x = CPY_LONG_DIGIT(lobj, 0);
if (x < 256)
return x;
} else if (likely(tag == CPY_SIGN_ZERO)) {
return 0;
}
#else
PyLongObject *lobj = (PyLongObject *)o;
Py_ssize_t size = lobj->ob_base.ob_size;
if (likely(size == 1)) {
// Fast path
digit x = lobj->ob_digit[0];
if (x < 256)
return x;
} else if (likely(size == 0)) {
return 0;
}
#endif
}
// Slow path
// u8 unboxing slow path
uint8_t CPyLong_AsUInt8_(PyObject *o) {
int overflow;
long result = PyLong_AsLongAndOverflow(o, &overflow);
if (result < 0 || result >= 256) {
Expand Down
Loading

0 comments on commit 31faa43

Please sign in to comment.