From 5dd775bed086909722ec7014a7c4f77a35f74a80 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 14 Dec 2024 01:21:46 +0900 Subject: [PATCH] gh-126024: unicodeobject: optimize find_first_nonascii (GH-127790) Remove 1 branch. --- Objects/unicodeobject.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 33c4747bbef488..b7aeb06d32bcec 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5077,21 +5077,24 @@ load_unaligned(const unsigned char *p, size_t size) static Py_ssize_t find_first_nonascii(const unsigned char *start, const unsigned char *end) { + // The search is done in `size_t` chunks. + // The start and end might not be aligned at `size_t` boundaries, + // so they're handled specially. + const unsigned char *p = start; if (end - start >= SIZEOF_SIZE_T) { - const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); + // Avoid unaligned read. #if PY_LITTLE_ENDIAN && HAVE_CTZ - if (p < p2) { - size_t u; - memcpy(&u, p, sizeof(size_t)); - u &= ASCII_CHAR_MASK; - if (u) { - return (ctz(u) - 7) / 8; - } - p = p2; + size_t u; + memcpy(&u, p, sizeof(size_t)); + u &= ASCII_CHAR_MASK; + if (u) { + return (ctz(u) - 7) / 8; } + p = _Py_ALIGN_DOWN(p + SIZEOF_SIZE_T, SIZEOF_SIZE_T); #else /* PY_LITTLE_ENDIAN && HAVE_CTZ */ + const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); while (p < p2) { if (*p & 0x80) { return p - start; @@ -5099,6 +5102,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) p++; } #endif + const unsigned char *e = end - SIZEOF_SIZE_T; while (p <= e) { size_t u = (*(const size_t *)p) & ASCII_CHAR_MASK; @@ -5115,6 +5119,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) } } #if PY_LITTLE_ENDIAN && HAVE_CTZ + assert((end - p) < SIZEOF_SIZE_T); // we can not use *(const size_t*)p to avoid buffer overrun. size_t u = load_unaligned(p, end - p) & ASCII_CHAR_MASK; if (u) {