Skip to content

Commit 10b7964

Browse files
committed
better string handling
1 parent 7e9df19 commit 10b7964

File tree

5 files changed

+370
-20
lines changed

5 files changed

+370
-20
lines changed

quaddtype/numpy_quaddtype/src/casts.cpp

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -203,34 +203,46 @@ static inline int
203203
unicode_to_quad_convert(const Py_UCS4 *ucs4_str, npy_intp unicode_size_chars,
204204
QuadBackendType backend, quad_value *out_val)
205205
{
206-
// Temporary buffer to convert UCS4 to null-terminated char string
207-
char temp_str[QUAD_STR_WIDTH + 1];
208-
npy_intp copy_len = unicode_size_chars < QUAD_STR_WIDTH ? unicode_size_chars : QUAD_STR_WIDTH;
206+
// Convert UCS4 to Python Unicode object then to UTF-8 bytes
207+
// This is more robust than manual UCS4→char conversion
208+
PyObject *unicode_obj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, ucs4_str, unicode_size_chars);
209+
if (unicode_obj == NULL) {
210+
return -1;
211+
}
209212

210-
// Convert UCS4 characters to ASCII/char
211-
npy_intp i;
212-
for (i = 0; i < copy_len; i++) {
213-
Py_UCS4 c = ucs4_str[i];
214-
215-
// reject non-ASCII characters
216-
if (c > 127) {
217-
PyErr_Format(PyExc_ValueError,
218-
"Cannot cast non-ASCII character '%c' to QuadPrecision", c);
219-
return -1;
220-
}
221-
222-
temp_str[i] = (char)c;
213+
// Convert to UTF-8 bytes
214+
const char *utf8_str = PyUnicode_AsUTF8(unicode_obj);
215+
if (utf8_str == NULL) {
216+
Py_DECREF(unicode_obj);
217+
return -1;
223218
}
224-
temp_str[i] = '\0';
225219

220+
// Use locale-independent parser
226221
char *endptr;
227-
int err = cstring_to_quad(temp_str, backend, out_val, &endptr, true);
222+
int err = NumPyOS_ascii_strtoq(utf8_str, backend, out_val, &endptr);
223+
224+
// Check for parse errors
228225
if (err < 0) {
229226
PyErr_Format(PyExc_ValueError,
230-
"could not convert string to QuadPrecision: np.str_('%s')", temp_str);
227+
"could not convert string to QuadPrecision: np.str_('%s')", utf8_str);
228+
Py_DECREF(unicode_obj);
229+
return -1;
230+
}
231+
232+
// Check that we parsed the entire string (skip trailing whitespace)
233+
while (*endptr == ' ' || *endptr == '\t' || *endptr == '\n' ||
234+
*endptr == '\r' || *endptr == '\f' || *endptr == '\v') {
235+
endptr++;
236+
}
237+
238+
if (*endptr != '\0') {
239+
PyErr_Format(PyExc_ValueError,
240+
"could not convert string to QuadPrecision: np.str_('%s')", utf8_str);
241+
Py_DECREF(unicode_obj);
231242
return -1;
232243
}
233244

245+
Py_DECREF(unicode_obj);
234246
return 0;
235247
}
236248

quaddtype/numpy_quaddtype/src/constants.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#ifndef QUAD_CONSTANTS_HPP
22
#define QUAD_CONSTANTS_HPP
33

4+
#ifdef __cplusplus
5+
extern "C" {
6+
#endif
7+
48
#include <sleef.h>
59
#include <sleefquad.h>
610
#include <stdint.h>
@@ -130,4 +134,8 @@ static inline ConstantResult get_sleef_constant_by_name(const char* constant_nam
130134
return result;
131135
}
132136

137+
#ifdef __cplusplus
138+
}
139+
#endif
140+
133141
#endif // QUAD_CONSTANTS_HPP

quaddtype/numpy_quaddtype/src/utilities.c

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,157 @@
1-
#include "utilities.h"
21
#include <stdlib.h>
2+
#include <string.h>
3+
#include <ctype.h>
4+
#include "utilities.h"
5+
#include "constants.hpp"
6+
7+
// Locale-independent ASCII character classification helpers
8+
static int
9+
ascii_isspace(int c)
10+
{
11+
return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
12+
}
13+
14+
static int
15+
ascii_isalpha(char c)
16+
{
17+
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
18+
}
19+
20+
static int
21+
ascii_isdigit(char c)
22+
{
23+
return (c >= '0' && c <= '9');
24+
}
25+
26+
static int
27+
ascii_isalnum(char c)
28+
{
29+
return ascii_isdigit(c) || ascii_isalpha(c);
30+
}
31+
32+
static int
33+
ascii_tolower(int c)
34+
{
35+
if (c >= 'A' && c <= 'Z') {
36+
return c + ('a' - 'A');
37+
}
38+
return c;
39+
}
40+
41+
// inspired from NumPyOS_ascii_strncasecmp
42+
static int
43+
ascii_strncasecmp(const char *s1, const char *s2, size_t n)
44+
{
45+
while (n > 0 && *s1 != '\0' && *s2 != '\0') {
46+
int c1 = ascii_tolower((unsigned char)*s1);
47+
int c2 = ascii_tolower((unsigned char)*s2);
48+
int diff = c1 - c2;
49+
50+
if (diff != 0) {
51+
return diff;
52+
}
53+
54+
s1++;
55+
s2++;
56+
n--;
57+
}
58+
59+
if(n > 0) {
60+
return *s1 - *s2;
61+
}
62+
return 0;
63+
}
64+
65+
/*
66+
* NumPyOS_ascii_strtoq:
67+
*
68+
* Locale-independent string to quad-precision parser.
69+
* Inspired by NumPyOS_ascii_strtold from NumPy.
70+
*
71+
* This function:
72+
* - Skips leading whitespace
73+
* - Recognizes inf/nan case-insensitively with optional signs and payloads
74+
* - Delegates to cstring_to_quad for numeric parsing
75+
*
76+
* Returns:
77+
* 0 on success
78+
* -1 on parse error
79+
*/
80+
int
81+
NumPyOS_ascii_strtoq(const char *s, QuadBackendType backend, quad_value *out_value, char **endptr)
82+
{
83+
const char *p;
84+
int sign;
85+
86+
// skip leading whitespace
87+
while (ascii_isspace(*s)) {
88+
s++;
89+
}
90+
91+
p = s;
92+
sign = 1;
93+
if (*p == '-') {
94+
sign = -1;
95+
++p;
96+
}
97+
else if (*p == '+') {
98+
++p;
99+
}
100+
101+
// Check for inf/infinity (case-insensitive)
102+
if (ascii_strncasecmp(p, "inf", 3) == 0) {
103+
p += 3;
104+
if (ascii_strncasecmp(p, "inity", 5) == 0) {
105+
p += 5;
106+
}
107+
108+
// Set infinity values with sign applied
109+
if (backend == BACKEND_SLEEF) {
110+
out_value->sleef_value = sign > 0 ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF;
111+
}
112+
else {
113+
out_value->longdouble_value = sign > 0 ? strtold("inf", NULL) : strtold("-inf", NULL);
114+
}
115+
116+
if (endptr) {
117+
*endptr = (char *)p;
118+
}
119+
return 0;
120+
}
121+
122+
// Check for nan (case-insensitive) with optional payload
123+
if (ascii_strncasecmp(p, "nan", 3) == 0) {
124+
p += 3;
125+
126+
// Skip optional (payload)
127+
if (*p == '(') {
128+
++p;
129+
while (ascii_isalnum(*p) || *p == '_') {
130+
++p;
131+
}
132+
if (*p == ')') {
133+
++p;
134+
}
135+
}
136+
137+
// Set NaN value (sign is ignored for NaN)
138+
if (backend == BACKEND_SLEEF) {
139+
out_value->sleef_value = QUAD_PRECISION_NAN;
140+
}
141+
else {
142+
out_value->longdouble_value = nanl("");
143+
}
144+
145+
if (endptr) {
146+
*endptr = (char *)p;
147+
}
148+
return 0;
149+
}
150+
151+
// For numeric values, delegate to cstring_to_quad
152+
// Pass the original string position (after whitespace, includes sign if present)
153+
return cstring_to_quad(s, backend, out_value, endptr, false);
154+
}
3155

4156
int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value,
5157
char **endptr, bool require_full_parse)

quaddtype/numpy_quaddtype/src/utilities.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ extern "C" {
1212

1313
int cstring_to_quad(const char *str, QuadBackendType backend, quad_value *out_value, char **endptr, bool require_full_parse);
1414

15+
// Locale-independent ASCII string to quad parser (inspired by NumPyOS_ascii_strtold)
16+
int NumPyOS_ascii_strtoq(const char *s, QuadBackendType backend, quad_value *out_value, char **endptr);
17+
1518
// Helper function: Convert quad_value to Sleef_quad for Dragon4
1619
Sleef_quad
1720
quad_to_sleef_quad(const quad_value *in_val, QuadBackendType backend);

0 commit comments

Comments
 (0)