-
-
Notifications
You must be signed in to change notification settings - Fork 2.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mypyc] Fix native tuple unboxing #15025
base: master
Are you sure you want to change the base?
Conversation
Here's the old and new C code for this test file: from typing import Any, Tuple
from mypy_extensions import i64, i32
class A: pass
def f(arg: Any) -> None:
t: Tuple[str, str] = arg
t2: Tuple[int, float, bool, None] = arg
t3: Tuple[i64, i32] = arg
t4: Tuple[A] = arg New code
char CPyDef_f(PyObject *cpy_r_arg) {
tuple_T2OO cpy_r_r0;
tuple_T2OO cpy_r_t;
tuple_T4IFCC cpy_r_r1;
tuple_T4IFCC cpy_r_t2;
tuple_T284 cpy_r_r2;
int64_t cpy_r_r3;
char cpy_r_r4;
tuple_T284 cpy_r_t3;
tuple_T1O cpy_r_r5;
PyObject *cpy_r_r6;
tuple_T1O cpy_r_t4;
char cpy_r_r7;
CPyL0: ;
if (!PyTuple_Check(cpy_r_arg) || PyTuple_Size(cpy_r_arg) != 2) {
CPy_TypeError("tuple[str, str]", cpy_r_arg); cpy_r_r0 = tuple_undefined_T2OO;
} else {
PyObject *__tmp3 = PyTuple_GET_ITEM(cpy_r_arg, 0);
if (likely(PyUnicode_Check(__tmp3)))
cpy_r_r0.f0 = __tmp3;
else {
goto __LL1;
}
PyObject *__tmp4 = PyTuple_GET_ITEM(cpy_r_arg, 1);
if (likely(PyUnicode_Check(__tmp4)))
cpy_r_r0.f1 = __tmp4;
else {
goto __LL1;
}
CPy_INCREF(cpy_r_r0.f0);
CPy_INCREF(cpy_r_r0.f1);
goto __LL2;
__LL1: ;
CPy_TypeError("tuple[str, str]", cpy_r_arg); cpy_r_r0 = tuple_undefined_T2OO;
}
__LL2: ;
if (unlikely(cpy_r_r0.f0 == NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 7, CPyStatic_globals);
goto CPyL6;
}
CPyL1: ;
cpy_r_t = cpy_r_r0;
CPy_DECREF(cpy_r_t.f0);
CPy_DECREF(cpy_r_t.f1);
if (!PyTuple_Check(cpy_r_arg) || PyTuple_Size(cpy_r_arg) != 4) {
CPy_TypeError("tuple[int, float, bool, None]", cpy_r_arg); cpy_r_r1 = tuple_undefined_T4IFCC;
} else {
PyObject *__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 0);
if (likely(PyLong_Check(__tmp7)))
cpy_r_r1.f0 = CPyTagged_BorrowFromObject(__tmp7);
else {
goto __LL5;
}
PyObject *__tmp8 = PyTuple_GET_ITEM(cpy_r_arg, 1);
cpy_r_r1.f1 = PyFloat_AsDouble(__tmp8);
if (cpy_r_r1.f1 == -1.0 && PyErr_Occurred()) {
goto __LL5;
}
PyObject *__tmp9 = PyTuple_GET_ITEM(cpy_r_arg, 2);
if (unlikely(!PyBool_Check(__tmp9))) {
goto __LL5;
} else
cpy_r_r1.f2 = __tmp9 == Py_True;
PyObject *__tmp10 = PyTuple_GET_ITEM(cpy_r_arg, 3);
if (unlikely(__tmp10 != Py_None)) {
goto __LL5;
} else
cpy_r_r1.f3 = 1;
CPyTagged_INCREF(cpy_r_r1.f0);
goto __LL6;
__LL5: ;
if (!PyErr_Occurred()) {
CPy_TypeError("tuple[int, float, bool, None]", cpy_r_arg);
}
cpy_r_r1 = tuple_undefined_T4IFCC;
}
__LL6: ;
if (unlikely(cpy_r_r1.f0 == CPY_INT_TAG)) {
CPy_AddTraceback("temp/unbox.py", "f", 8, CPyStatic_globals);
goto CPyL6;
}
CPyL2: ;
cpy_r_t2 = cpy_r_r1;
CPyTagged_DECREF(cpy_r_t2.f0);
if (!PyTuple_Check(cpy_r_arg) || PyTuple_Size(cpy_r_arg) != 2) {
CPy_TypeError("tuple[int64, int32]", cpy_r_arg); cpy_r_r2 = tuple_undefined_T284;
} else {
PyObject *__tmp13 = PyTuple_GET_ITEM(cpy_r_arg, 0);
cpy_r_r2.f0 = CPyLong_AsInt64(__tmp13);
if (cpy_r_r2.f0 == -113 && PyErr_Occurred()) {
goto __LL11;
}
PyObject *__tmp14 = PyTuple_GET_ITEM(cpy_r_arg, 1);
cpy_r_r2.f1 = CPyLong_AsInt32(__tmp14);
if (cpy_r_r2.f1 == -113 && PyErr_Occurred()) {
goto __LL11;
}
goto __LL12;
__LL11: ;
if (!PyErr_Occurred()) {
CPy_TypeError("tuple[int64, int32]", cpy_r_arg);
}
cpy_r_r2 = tuple_undefined_T284;
}
__LL12: ;
cpy_r_r3 = cpy_r_r2.f0;
cpy_r_r4 = cpy_r_r3 == -113;
if (unlikely(cpy_r_r4)) goto CPyL4;
CPyL3: ;
cpy_r_t3 = cpy_r_r2;
if (!PyTuple_Check(cpy_r_arg) || PyTuple_Size(cpy_r_arg) != 1) {
CPy_TypeError("tuple[unbox.A]", cpy_r_arg); cpy_r_r5 = tuple_undefined_T1O;
} else {
PyObject *__tmp17 = PyTuple_GET_ITEM(cpy_r_arg, 0);
if (likely(Py_TYPE(__tmp17) == CPyType_A))
cpy_r_r5.f0 = __tmp17;
else {
goto __LL15;
}
CPy_INCREF(cpy_r_r5.f0);
goto __LL16;
__LL15: ;
CPy_TypeError("tuple[unbox.A]", cpy_r_arg); cpy_r_r5 = tuple_undefined_T1O;
}
__LL16: ;
if (unlikely(cpy_r_r5.f0 == NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 10, CPyStatic_globals);
goto CPyL6;
} else
goto CPyL5;
CPyL4: ;
cpy_r_r6 = PyErr_Occurred();
if (unlikely(cpy_r_r6 != NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 9, CPyStatic_globals);
goto CPyL6;
} else
goto CPyL3;
CPyL5: ;
cpy_r_t4 = cpy_r_r5;
CPy_DECREF(cpy_r_t4.f0);
return 1;
CPyL6: ;
cpy_r_r7 = 2;
return cpy_r_r7;
} Old code
char CPyDef_f(PyObject *cpy_r_arg) {
tuple_T2OO cpy_r_r0;
tuple_T2OO cpy_r_t;
tuple_T4IFCC cpy_r_r1;
tuple_T4IFCC cpy_r_t2;
tuple_T284 cpy_r_r2;
int64_t cpy_r_r3;
char cpy_r_r4;
tuple_T284 cpy_r_t3;
tuple_T1O cpy_r_r5;
PyObject *cpy_r_r6;
tuple_T1O cpy_r_t4;
char cpy_r_r7;
CPyL0: ;
PyObject *__tmp1;
if (unlikely(!(PyTuple_Check(cpy_r_arg) && PyTuple_GET_SIZE(cpy_r_arg) == 2))) {
__tmp1 = NULL;
goto __LL2;
}
if (likely(PyUnicode_Check(PyTuple_GET_ITEM(cpy_r_arg, 0))))
__tmp1 = PyTuple_GET_ITEM(cpy_r_arg, 0);
else {
__tmp1 = NULL;
}
if (__tmp1 == NULL) goto __LL2;
if (likely(PyUnicode_Check(PyTuple_GET_ITEM(cpy_r_arg, 1))))
__tmp1 = PyTuple_GET_ITEM(cpy_r_arg, 1);
else {
__tmp1 = NULL;
}
if (__tmp1 == NULL) goto __LL2;
__tmp1 = cpy_r_arg;
__LL2: ;
if (unlikely(__tmp1 == NULL)) {
CPy_TypeError("tuple[str, str]", cpy_r_arg); cpy_r_r0 = tuple_undefined_T2OO;
} else {
PyObject *__tmp3 = PyTuple_GET_ITEM(cpy_r_arg, 0);
CPy_INCREF(__tmp3);
PyObject *__tmp4;
if (likely(PyUnicode_Check(__tmp3)))
__tmp4 = __tmp3;
else {
CPy_TypeError("str", __tmp3);
__tmp4 = NULL;
}
cpy_r_r0.f0 = __tmp4;
PyObject *__tmp5 = PyTuple_GET_ITEM(cpy_r_arg, 1);
CPy_INCREF(__tmp5);
PyObject *__tmp6;
if (likely(PyUnicode_Check(__tmp5)))
__tmp6 = __tmp5;
else {
CPy_TypeError("str", __tmp5);
__tmp6 = NULL;
}
cpy_r_r0.f1 = __tmp6;
}
if (unlikely(cpy_r_r0.f0 == NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 7, CPyStatic_globals);
goto CPyL6;
}
CPyL1: ;
cpy_r_t = cpy_r_r0;
CPy_DECREF(cpy_r_t.f0);
CPy_DECREF(cpy_r_t.f1);
PyObject *__tmp7;
if (unlikely(!(PyTuple_Check(cpy_r_arg) && PyTuple_GET_SIZE(cpy_r_arg) == 4))) {
__tmp7 = NULL;
goto __LL8;
}
if (likely(PyLong_Check(PyTuple_GET_ITEM(cpy_r_arg, 0))))
__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 0);
else {
__tmp7 = NULL;
}
if (__tmp7 == NULL) goto __LL8;
if (likely(CPyFloat_Check(PyTuple_GET_ITEM(cpy_r_arg, 1))))
__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 1);
else {
__tmp7 = NULL;
}
if (__tmp7 == NULL) goto __LL8;
if (likely(PyBool_Check(PyTuple_GET_ITEM(cpy_r_arg, 2))))
__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 2);
else {
__tmp7 = NULL;
}
if (__tmp7 == NULL) goto __LL8;
if (likely(PyTuple_GET_ITEM(cpy_r_arg, 3) == Py_None))
__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 3);
else {
__tmp7 = NULL;
}
if (__tmp7 == NULL) goto __LL8;
__tmp7 = cpy_r_arg;
__LL8: ;
if (unlikely(__tmp7 == NULL)) {
CPy_TypeError("tuple[int, float, bool, None]", cpy_r_arg); cpy_r_r1 = tuple_undefined_T4IFCC;
} else {
PyObject *__tmp9 = PyTuple_GET_ITEM(cpy_r_arg, 0);
CPyTagged __tmp10;
if (likely(PyLong_Check(__tmp9)))
__tmp10 = CPyTagged_FromObject(__tmp9);
else {
CPy_TypeError("int", __tmp9); __tmp10 = CPY_INT_TAG;
}
cpy_r_r1.f0 = __tmp10;
PyObject *__tmp11 = PyTuple_GET_ITEM(cpy_r_arg, 1);
double __tmp12;
__tmp12 = PyFloat_AsDouble(__tmp11);
if (__tmp12 == -1.0 && PyErr_Occurred()) {
__tmp12 = -113.0;
}
cpy_r_r1.f1 = __tmp12;
PyObject *__tmp13 = PyTuple_GET_ITEM(cpy_r_arg, 2);
char __tmp14;
if (unlikely(!PyBool_Check(__tmp13))) {
CPy_TypeError("bool", __tmp13); __tmp14 = 2;
} else
__tmp14 = __tmp13 == Py_True;
cpy_r_r1.f2 = __tmp14;
PyObject *__tmp15 = PyTuple_GET_ITEM(cpy_r_arg, 3);
char __tmp16;
if (unlikely(__tmp15 != Py_None)) {
CPy_TypeError("None", __tmp15); __tmp16 = 2;
} else
__tmp16 = 1;
cpy_r_r1.f3 = __tmp16;
}
if (unlikely(cpy_r_r1.f0 == CPY_INT_TAG)) {
CPy_AddTraceback("temp/unbox.py", "f", 8, CPyStatic_globals);
goto CPyL6;
}
CPyL2: ;
cpy_r_t2 = cpy_r_r1;
CPyTagged_DECREF(cpy_r_t2.f0);
PyObject *__tmp17;
if (unlikely(!(PyTuple_Check(cpy_r_arg) && PyTuple_GET_SIZE(cpy_r_arg) == 2))) {
__tmp17 = NULL;
goto __LL18;
}
if (likely(PyLong_Check(PyTuple_GET_ITEM(cpy_r_arg, 0))))
__tmp17 = PyTuple_GET_ITEM(cpy_r_arg, 0);
else {
__tmp17 = NULL;
}
if (__tmp17 == NULL) goto __LL18;
if (likely(PyLong_Check(PyTuple_GET_ITEM(cpy_r_arg, 1))))
__tmp17 = PyTuple_GET_ITEM(cpy_r_arg, 1);
else {
__tmp17 = NULL;
}
if (__tmp17 == NULL) goto __LL18;
__tmp17 = cpy_r_arg;
__LL18: ;
if (unlikely(__tmp17 == NULL)) {
CPy_TypeError("tuple[int64, int32]", cpy_r_arg); cpy_r_r2 = tuple_undefined_T284;
} else {
PyObject *__tmp19 = PyTuple_GET_ITEM(cpy_r_arg, 0);
int64_t __tmp20;
__tmp20 = CPyLong_AsInt64(__tmp19);
cpy_r_r2.f0 = __tmp20;
PyObject *__tmp21 = PyTuple_GET_ITEM(cpy_r_arg, 1);
int32_t __tmp22;
__tmp22 = CPyLong_AsInt32(__tmp21);
cpy_r_r2.f1 = __tmp22;
}
cpy_r_r3 = cpy_r_r2.f0;
cpy_r_r4 = cpy_r_r3 == -113;
if (unlikely(cpy_r_r4)) goto CPyL4;
CPyL3: ;
cpy_r_t3 = cpy_r_r2;
PyObject *__tmp23;
if (unlikely(!(PyTuple_Check(cpy_r_arg) && PyTuple_GET_SIZE(cpy_r_arg) == 1))) {
__tmp23 = NULL;
goto __LL24;
}
if (likely(Py_TYPE(PyTuple_GET_ITEM(cpy_r_arg, 0)) == CPyType_A))
__tmp23 = PyTuple_GET_ITEM(cpy_r_arg, 0);
else {
__tmp23 = NULL;
}
if (__tmp23 == NULL) goto __LL24;
__tmp23 = cpy_r_arg;
__LL24: ;
if (unlikely(__tmp23 == NULL)) {
CPy_TypeError("tuple[unbox.A]", cpy_r_arg); cpy_r_r5 = tuple_undefined_T1O;
} else {
PyObject *__tmp25 = PyTuple_GET_ITEM(cpy_r_arg, 0);
CPy_INCREF(__tmp25);
PyObject *__tmp26;
if (likely(Py_TYPE(__tmp25) == CPyType_A))
__tmp26 = __tmp25;
else {
CPy_TypeError("unbox.A", __tmp25);
__tmp26 = NULL;
}
cpy_r_r5.f0 = __tmp26;
}
if (unlikely(cpy_r_r5.f0 == NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 10, CPyStatic_globals);
goto CPyL6;
} else
goto CPyL5;
CPyL4: ;
cpy_r_r6 = PyErr_Occurred();
if (unlikely(cpy_r_r6 != NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 9, CPyStatic_globals);
goto CPyL6;
} else
goto CPyL3;
CPyL5: ;
cpy_r_t4 = cpy_r_r5;
CPy_DECREF(cpy_r_t4.f0);
return 1;
CPyL6: ;
cpy_r_r7 = 2;
return cpy_r_r7;
} |
Currently, unboxing tuples first involve doing a tuple *cast* to do error checks in advance. This hack was added because the error handling code for unboxing tuples is busted and would cause crashes when the unbox failed. (It only checks if the first item is uninitialized. If the second item is uninitialized while the first is not, no exception is raised, leading to crashes when it's accessed later.) This commit reworks the tuple unbox logic so the cast isn't needed anymore. Instead ... - Use emit_arg_check() to do a tuple type and length; and then - Modify the error handler for the item unboxes/casts to jump to an error block that uninitializes the whole tuple[^1] and sets an exception. I had to add support for custom error handlers to native integer and float unboxes so a jump to the tuple unbox error block is possible on failure. NOTE: this doesn't affect tuple->tuple coercions because those are optimized away: each tuple item are coerced individually instead. This only affects coercions where the source is either Any or a tuple of variable length. [^1]: While uninitializing only the first element would suffice, it’s simpler and probably safer to uninitialize the entire tuple.
9d982c7
to
a2cd463
Compare
Actually, I may as well double check the refcount management of this code while I'm fixing it. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the PR! Looks good overall.
I created a simple benchmark and it somewhat surprisingly seems about 20% slower with the new code (Python 3.11 and Apple M1 Max):
def f(n: int, o: list[tuple[int, str]]) -> None:
for i in range(n):
x, y = o[0]
from time import time
t0 = time()
f(1000 * 1000 * 1000, [(1, 'x')])
print(time() - t0)
I'm not sure what is going on, but I have a few guesses:
- It could be just random fluctuations and not real.
- The old code uses
PyTuple_GET_SIZE
while the new code usesPyTuple_Size
, and the latter may be slower if it can't be inlined. - The old code uses
CPyTagged_FromObject
which may be faster thanCPyTagged_StealFromObject
, since the latter requires an extra incref. - There's no
unlikely(...)
around!PyTuple_Check(cpy_r_r9) || PyTuple_Size(cpy_r_r9) != 2
in the new code.
Can you investigate this a bit?
Good catch! Adding |
... and rework the u8 unboxing code to follow this PR's changes.
I have a (not yet complete) patch that merges the unbox error branches within the unbox op that depends on this PR. @JukkaL I'd appreciate if this could be looked at soon (especially since this seems merge conflict prone). Thanks! --- build/__native.c 2023-07-20 12:47:55.132023572 -0400
+++ check-build/__native.c 2023-07-20 12:47:19.404279527 -0400
@@ -67,31 +67,22 @@
if (likely(PyLong_Check(cpy_r_arg)))
cpy_r_r0 = CPyTagged_FromObject(cpy_r_arg);
else {
- CPy_TypeError("int", cpy_r_arg); cpy_r_r0 = CPY_INT_TAG;
- }
- if (unlikely(cpy_r_r0 == CPY_INT_TAG)) {
- CPy_AddTraceback("temp/unbox_size.py", "main", 5, CPyStatic_globals);
+ CPy_TypeErrorTraceback("temp/unbox_size.py", "main", 5, CPyStatic_globals, "int", cpy_r_arg);
goto CPyL4;
}
cpy_r_integer = cpy_r_r0;
CPyTagged_DECREF(cpy_r_integer);
if (unlikely(!PyBool_Check(cpy_r_arg))) {
- CPy_TypeError("bool", cpy_r_arg); cpy_r_r1 = 2;
+ CPy_TypeErrorTraceback("temp/unbox_size.py", "main", 6, CPyStatic_globals, "bool", cpy_r_arg);
+ goto CPyL4;
} else
cpy_r_r1 = cpy_r_arg == Py_True;
- if (unlikely(cpy_r_r1 == 2)) {
- CPy_AddTraceback("temp/unbox_size.py", "main", 6, CPyStatic_globals);
- goto CPyL4;
- }
cpy_r_boolean = cpy_r_r1;
if (unlikely(cpy_r_arg != Py_None)) {
- CPy_TypeError("None", cpy_r_arg); cpy_r_r2 = 2;
+ CPy_TypeErrorTraceback("temp/unbox_size.py", "main", 7, CPyStatic_globals, "None", cpy_r_arg);
+ goto CPyL4;
} else
cpy_r_r2 = 1;
- if (unlikely(cpy_r_r2 == 2)) {
- CPy_AddTraceback("temp/unbox_size.py", "main", 7, CPyStatic_globals);
- goto CPyL4;
- }
cpy_r_none = cpy_r_r2;
return 1;
CPyL4: ; |
@JukkaL, been a while since I've last been around. I hope you're well. Could you take a look at this? |
Sorry for the slow response! It looks like the microbenchmark is still about 5% slower with this change (Applex M1 Max). I wonder if we can have something that is faster without too much extra complexity. |
Currently, unboxing tuples first involve doing a tuple cast to do
error checks in advance. This hack was added because the error handling
code for unboxing tuples is busted and would cause crashes when the
unbox failed. (It only checks if the first item is uninitialized. If
the second item is uninitialized while the first is not, no exception
is raised, leading to crashes when it's accessed later.)
This commit reworks the tuple unbox logic so the cast isn't needed
anymore. Instead ...
error block that uninitializes the whole tuple1 and sets an
exception.
I had to add support for custom error handlers to native integer and
float unboxes so a jump to the tuple unbox error block is possible on
failure.
NOTE: this doesn't affect tuple->tuple coercions because those are
optimized away: each tuple item are coerced individually instead. This
only affects coercions where the source is either Any or a tuple of
variable length.
Fixes mypyc/mypyc#451.
Footnotes
While uninitializing only the first element would suffice, it’s
simpler and probably safer to uninitialize the entire tuple. ↩