Skip to content

Commit f417754

Browse files
committed
BUG: parse floats outside of PyFloat_FromString for python 2.5
1 parent 128995c commit f417754

File tree

3 files changed

+239
-3
lines changed

3 files changed

+239
-3
lines changed

pandas/src/inference.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
317317
convert to proper dtype array
318318
'''
319319
cdef:
320+
int status
320321
Py_ssize_t i, n
321322
ndarray[float64_t] floats
322323
ndarray[complex128_t] complexes
@@ -354,7 +355,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
354355
complexes[i] = val
355356
seen_complex = 1
356357
else:
357-
fval = util.floatify(val)
358+
status = util.floatify(val, &fval)
358359
floats[i] = fval
359360
if not seen_float:
360361
if '.' in val or fval == INF or fval == NEGINF:

pandas/src/numpy_helper.h

Lines changed: 236 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,17 +133,252 @@ char_to_string(char* data) {
133133
// return PyString_Check(obj);
134134
// #endif
135135

136-
PANDAS_INLINE PyObject* floatify(PyObject* str) {
136+
#include <errno.h>
137+
#include <float.h>
137138

139+
double PANDAS_INLINE xstrtod(const char *p, char **q, char decimal, char sci, int skip_trailing);
140+
141+
int to_double(char *item, double *p_value, char sci, char decimal)
142+
{
143+
char *p_end;
144+
145+
*p_value = xstrtod(item, &p_end, decimal, sci, 1);
146+
147+
return (errno == 0) && (!*p_end);
148+
}
149+
150+
#if PY_VERSION_HEX < 0x02060000
151+
#define PyBytes_Check PyString_Check
152+
#define PyBytes_AS_STRING PyString_AS_STRING
153+
#endif
154+
155+
PANDAS_INLINE int floatify(PyObject* str, double *result) {
156+
int status;
157+
char *data;
158+
PyObject* tmp = NULL;
159+
const char sci = 'E';
160+
const char dec = '.';
161+
162+
if (PyBytes_Check(str)) {
163+
data = PyBytes_AS_STRING(str);
164+
} else if (PyUnicode_Check(str)) {
165+
tmp = PyUnicode_AsUTF8String(str);
166+
data = PyBytes_AS_STRING(tmp);
167+
} else {
168+
PyErr_SetString(PyExc_TypeError, "Invalid object type");
169+
return -1;
170+
}
171+
172+
status = to_double(data, result, sci, dec);
173+
174+
if (!status) {
175+
/* handle inf/-inf */
176+
if (0 == strcmp(data, "-inf")) {
177+
*result = -HUGE_VAL;
178+
} else if (0 == strcmp(data, "inf")) {
179+
*result = HUGE_VAL;
180+
} else {
181+
PyErr_SetString(PyExc_ValueError, "Unable to parse string");
182+
Py_XDECREF(tmp);
183+
return -1;
184+
}
185+
}
186+
187+
Py_XDECREF(tmp);
188+
return 0;
189+
190+
/*
138191
#if PY_VERSION_HEX >= 0x03000000
139192
return PyFloat_FromString(str);
140193
#else
141194
return PyFloat_FromString(str, NULL);
142195
#endif
196+
*/
143197

144198
}
145199

146200

201+
// ---------------------------------------------------------------------------
202+
// Implementation of xstrtod
203+
204+
//
205+
// strtod.c
206+
//
207+
// Convert string to double
208+
//
209+
// Copyright (C) 2002 Michael Ringgaard. All rights reserved.
210+
//
211+
// Redistribution and use in source and binary forms, with or without
212+
// modification, are permitted provided that the following conditions
213+
// are met:
214+
//
215+
// 1. Redistributions of source code must retain the above copyright
216+
// notice, this list of conditions and the following disclaimer.
217+
// 2. Redistributions in binary form must reproduce the above copyright
218+
// notice, this list of conditions and the following disclaimer in the
219+
// documentation and/or other materials provided with the distribution.
220+
// 3. Neither the name of the project nor the names of its contributors
221+
// may be used to endorse or promote products derived from this software
222+
// without specific prior written permission.
223+
//
224+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
225+
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
226+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
227+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
228+
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
229+
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
230+
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
231+
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
232+
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
233+
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
234+
// SUCH DAMAGE.
235+
//
236+
// -----------------------------------------------------------------------
237+
// Modifications by Warren Weckesser, March 2011:
238+
// * Rename strtod() to xstrtod().
239+
// * Added decimal and sci arguments.
240+
// * Skip trailing spaces.
241+
// * Commented out the other functions.
242+
//
243+
244+
PANDAS_INLINE void lowercase(char *p) {
245+
for ( ; *p; ++p) *p = tolower(*p);
246+
}
247+
248+
PANDAS_INLINE void uppercase(char *p) {
249+
for ( ; *p; ++p) *p = toupper(*p);
250+
}
251+
252+
253+
double PANDAS_INLINE xstrtod(const char *str, char **endptr, char decimal,
254+
char sci, int skip_trailing)
255+
{
256+
double number;
257+
int exponent;
258+
int negative;
259+
char *p = (char *) str;
260+
double p10;
261+
int n;
262+
int num_digits;
263+
int num_decimals;
264+
265+
errno = 0;
266+
267+
// Skip leading whitespace
268+
while (isspace(*p)) p++;
269+
270+
// Handle optional sign
271+
negative = 0;
272+
switch (*p)
273+
{
274+
case '-': negative = 1; // Fall through to increment position
275+
case '+': p++;
276+
}
277+
278+
number = 0.;
279+
exponent = 0;
280+
num_digits = 0;
281+
num_decimals = 0;
282+
283+
// Process string of digits
284+
while (isdigit(*p))
285+
{
286+
number = number * 10. + (*p - '0');
287+
p++;
288+
num_digits++;
289+
}
290+
291+
// Process decimal part
292+
if (*p == decimal)
293+
{
294+
p++;
295+
296+
while (isdigit(*p))
297+
{
298+
number = number * 10. + (*p - '0');
299+
p++;
300+
num_digits++;
301+
num_decimals++;
302+
}
303+
304+
exponent -= num_decimals;
305+
}
306+
307+
if (num_digits == 0)
308+
{
309+
errno = ERANGE;
310+
return 0.0;
311+
}
312+
313+
// Correct for sign
314+
if (negative) number = -number;
315+
316+
// Process an exponent string
317+
if (toupper(*p) == toupper(sci))
318+
{
319+
// Handle optional sign
320+
negative = 0;
321+
switch (*++p)
322+
{
323+
case '-': negative = 1; // Fall through to increment pos
324+
case '+': p++;
325+
}
326+
327+
// Process string of digits
328+
n = 0;
329+
while (isdigit(*p))
330+
{
331+
n = n * 10 + (*p - '0');
332+
p++;
333+
}
334+
335+
if (negative)
336+
exponent -= n;
337+
else
338+
exponent += n;
339+
}
340+
341+
342+
if (exponent < DBL_MIN_EXP || exponent > DBL_MAX_EXP)
343+
{
344+
345+
errno = ERANGE;
346+
return HUGE_VAL;
347+
}
348+
349+
// Scale the result
350+
p10 = 10.;
351+
n = exponent;
352+
if (n < 0) n = -n;
353+
while (n)
354+
{
355+
if (n & 1)
356+
{
357+
if (exponent < 0)
358+
number /= p10;
359+
else
360+
number *= p10;
361+
}
362+
n >>= 1;
363+
p10 *= p10;
364+
}
365+
366+
367+
if (number == HUGE_VAL) {
368+
errno = ERANGE;
369+
}
370+
371+
if (skip_trailing) {
372+
// Skip trailing whitespace
373+
while (isspace(*p)) p++;
374+
}
375+
376+
if (endptr) *endptr = p;
377+
378+
379+
return number;
380+
}
381+
147382
void set_array_owndata(PyArrayObject *ao) {
148383
ao->flags |= NPY_OWNDATA;
149384
}

pandas/src/util.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ cdef extern from "numpy_helper.h":
1515
inline cnp.int64_t get_nat()
1616
inline object get_value_1d(ndarray, Py_ssize_t)
1717
inline char *get_c_string(object)
18-
inline object floatify(object)
18+
inline int floatify(object, double *result) except -1
1919
inline object char_to_string(char*)
2020

2121
cdef inline object get_value_at(ndarray arr, object loc):

0 commit comments

Comments
 (0)