From b1a1dadc2063b03d0d091176b8428f2502930e17 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 10:47:58 -0800 Subject: [PATCH 01/10] use cythonize in setup --- pandas/_libs/lib.pyx | 2 +- pandas/_libs/tslibs/conversion.pyx | 6 +- pandas/_libs/tslibs/frequencies.pyx | 2 +- pandas/_libs/tslibs/nattype.pyx | 6 +- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/_libs/tslibs/period.pyx | 4 +- pandas/_libs/tslibs/resolution.pyx | 2 +- pandas/_libs/tslibs/strptime.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 8 +-- pandas/_libs/tslibs/timestamps.pyx | 6 +- pandas/_libs/tslibs/timezones.pyx | 2 +- pandas/_libs/util.pxd | 65 +++++++++++++++++++ pandas/_libs/{src/util.pxd => util.pyx} | 82 ++++++++---------------- pandas/io/sas/sas7bdat.py | 2 +- setup.py | 84 ++++++------------------- 15 files changed, 132 insertions(+), 143 deletions(-) create mode 100644 pandas/_libs/util.pxd rename pandas/_libs/{src/util.pxd => util.pyx} (71%) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c3a654b01022c..95b7ae5d3c24c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1106,4 +1106,4 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, return result -include "inference.pyx" +include "src/inference.pyx" diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a32bfc1f6836c..ce327de2e41e1 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -27,9 +27,9 @@ from np_datetime cimport (check_dts_bounds, get_datetime64_unit, get_datetime64_value, pydatetime_to_dt64) -from util cimport (is_string_object, - is_datetime64_object, - is_integer_object, is_float_object, is_array) +from ..util cimport (is_string_object, + is_datetime64_object, + is_integer_object, is_float_object, is_array) from timedeltas cimport cast_from_unit from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index abaf8cad09bdb..a977773d49cf9 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -8,7 +8,7 @@ cimport numpy as cnp from numpy cimport int64_t cnp.import_array() -from util cimport is_integer_object, is_string_object +from ..util cimport is_integer_object, is_string_object from ccalendar import MONTH_NUMBERS diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 9f4ef4e515058..c2d20e85af7a5 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -17,9 +17,9 @@ cimport numpy as cnp from numpy cimport int64_t cnp.import_array() -from util cimport (get_nat, - is_integer_object, is_float_object, - is_datetime64_object, is_timedelta64_object) +from ..util cimport (get_nat, + is_integer_object, is_float_object, + is_datetime64_object, is_timedelta64_object) # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index e02818dd818df..46091ef357708 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -15,7 +15,7 @@ from numpy cimport int64_t cnp.import_array() -from util cimport is_string_object, is_integer_object +from ..util cimport is_string_object, is_integer_object from ccalendar import MONTHS, DAYS from conversion cimport tz_convert_single, pydt_to_i8 diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e2caebe4c4afc..8af2e84cda340 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -24,8 +24,8 @@ PyDateTime_IMPORT from np_datetime cimport (pandas_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct, is_leapyear) -cimport util -from util cimport is_period_object, is_string_object, INT32_MIN +from ..cimport util +from ..util cimport is_period_object, is_string_object, INT32_MIN from pandas._libs.missing cimport is_null_datetimelike diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index b166babe5992c..f77cadbe9d930 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -8,7 +8,7 @@ cimport numpy as cnp from numpy cimport ndarray, int64_t cnp.import_array() -from util cimport is_string_object, get_nat +from ..util cimport is_string_object, get_nat from pandas._libs.khash cimport (khiter_t, kh_destroy_int64, kh_put_int64, diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index e7dabb94f8975..7b64468bc4a31 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -35,7 +35,7 @@ from cpython.datetime cimport datetime from np_datetime cimport (check_dts_bounds, dtstruct_to_dt64, pandas_datetimestruct) -from util cimport is_string_object +from ..util cimport is_string_object from nattype cimport checknull_with_nat, NPY_NAT from nattype import nat_strings diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 37693068e0974..c607f02158622 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -21,10 +21,10 @@ from cpython.datetime cimport (datetime, timedelta, PyDateTime_IMPORT -cimport util -from util cimport (is_timedelta64_object, is_datetime64_object, - is_integer_object, is_float_object, - is_string_object) +from ..cimport util +from ..util cimport (is_timedelta64_object, is_datetime64_object, + is_integer_object, is_float_object, + is_string_object) from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, pandas_timedeltastruct) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b9be9c16eb6c3..13621f0c0e010 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -16,9 +16,9 @@ from cpython.datetime cimport (datetime, PyDateTime_IMPORT) PyDateTime_IMPORT -from util cimport (is_datetime64_object, is_timedelta64_object, - is_integer_object, is_string_object, is_array, - INT64_MAX) +from ..util cimport (is_datetime64_object, is_timedelta64_object, + is_integer_object, is_string_object, is_array, + INT64_MAX) cimport ccalendar from conversion import tz_localize_to_utc, date_normalize diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 215ae9ce087ee..f2f9b3245a7d7 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -23,7 +23,7 @@ from numpy cimport ndarray, int64_t cnp.import_array() # ---------------------------------------------------------------------- -from util cimport is_string_object, is_integer_object, get_nat +from ..util cimport is_string_object, is_integer_object, get_nat cdef int64_t NPY_NAT = get_nat() diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd new file mode 100644 index 0000000000000..5016362a6e37f --- /dev/null +++ b/pandas/_libs/util.pxd @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +from cython cimport Py_ssize_t + +cimport numpy as cnp +from numpy cimport ndarray + +cdef extern from "src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + +ctypedef fused numeric: + cnp.int8_t + cnp.int16_t + cnp.int32_t + cnp.int64_t + + cnp.uint8_t + cnp.uint16_t + cnp.uint32_t + cnp.uint64_t + + cnp.float32_t + cnp.float64_t + + +cdef extern from "src/numpy_helper.h": + void set_array_not_contiguous(ndarray ao) + + int assign_value_1d(ndarray, Py_ssize_t, object) except -1 + cnp.int64_t get_nat() + object get_value_1d(ndarray, Py_ssize_t) + char *get_c_string(object) except NULL + object char_to_string(char*) + + +cdef bint is_string_object(object obj) nogil +cdef bint is_integer_object(object obj) nogil +cdef bint is_float_object(object obj) nogil +cdef bint is_complex_object(object obj) nogil +cdef bint is_bool_object(object obj) nogil +cdef bint is_timedelta64_object(object obj) nogil +cdef bint is_datetime64_object(object obj) nogil + +cdef bint is_array(object o) +cdef bint is_period_object(object val) + +cdef bint _checknull(object val) +cdef bint _checknan(object val) + +cdef object unbox_if_zerodim(object arr) + +cdef set_value_at(ndarray arr, object loc, object value) +cdef set_value_at_unsafe(ndarray arr, object loc, object value) +cdef object get_value_at(ndarray arr, object loc) diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/util.pyx similarity index 71% rename from pandas/_libs/src/util.pxd rename to pandas/_libs/util.pyx index cf23df1279f34..81d5d22fdfab3 100644 --- a/pandas/_libs/src/util.pxd +++ b/pandas/_libs/util.pyx @@ -1,7 +1,4 @@ -from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS -cimport numpy as cnp -cnp.import_array() - +# -*- coding: utf-8 -*- cimport cpython from cpython cimport PyTypeObject @@ -15,6 +12,9 @@ cdef extern from "Python.h": bint PyComplex_Check(object obj) nogil bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil +cimport numpy as cnp +from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS +cnp.import_array() cdef extern from "numpy/arrayobject.h": PyTypeObject PyFloatingArrType_Type @@ -31,76 +31,48 @@ cdef extern from "numpy/ndarrayobject.h": # -------------------------------------------------------------------- # Type Checking -cdef inline bint is_string_object(object obj) nogil: +cdef bint is_string_object(object obj) nogil: return PyString_Check(obj) or PyUnicode_Check(obj) -cdef inline bint is_integer_object(object obj) nogil: +cdef bint is_integer_object(object obj) nogil: return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) -cdef inline bint is_float_object(object obj) nogil: +cdef bint is_float_object(object obj) nogil: return (PyFloat_Check(obj) or (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) -cdef inline bint is_complex_object(object obj) nogil: +cdef bint is_complex_object(object obj) nogil: return (PyComplex_Check(obj) or PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) -cdef inline bint is_bool_object(object obj) nogil: +cdef bint is_bool_object(object obj) nogil: return (PyBool_Check(obj) or PyObject_TypeCheck(obj, &PyBoolArrType_Type)) -cdef inline bint is_timedelta64_object(object obj) nogil: +cdef bint is_timedelta64_object(object obj) nogil: return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) -cdef inline bint is_datetime64_object(object obj) nogil: +cdef bint is_datetime64_object(object obj) nogil: return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + +cdef bint is_array(object o): + return cnp.PyArray_Check(o) + + +cdef bint is_period_object(object val): + return getattr(val, '_typ', '_typ') == 'period' + # -------------------------------------------------------------------- -cdef extern from "numpy_helper.h": - void set_array_not_contiguous(ndarray ao) - - int assign_value_1d(ndarray, Py_ssize_t, object) except -1 - cnp.int64_t get_nat() - object get_value_1d(ndarray, Py_ssize_t) - char *get_c_string(object) except NULL - object char_to_string(char*) - -ctypedef fused numeric: - cnp.int8_t - cnp.int16_t - cnp.int32_t - cnp.int64_t - - cnp.uint8_t - cnp.uint16_t - cnp.uint32_t - cnp.uint64_t - - cnp.float32_t - cnp.float64_t - -cdef extern from "headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN - -cdef inline object get_value_at(ndarray arr, object loc): + +cdef object get_value_at(ndarray arr, object loc): cdef: Py_ssize_t i, sz int casted @@ -119,7 +91,8 @@ cdef inline object get_value_at(ndarray arr, object loc): return get_value_1d(arr, i) -cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): + +cdef set_value_at_unsafe(ndarray arr, object loc, object value): """Sets a value into the array without checking the writeable flag. This should be used when setting values in a loop, check the writeable @@ -141,7 +114,7 @@ cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): assign_value_1d(arr, i, value) -cdef inline set_value_at(ndarray arr, object loc, object value): +cdef set_value_at(ndarray arr, object loc, object value): """Sets a value into the array after checking that the array is mutable. """ if not cnp.PyArray_ISWRITEABLE(arr): @@ -150,21 +123,16 @@ cdef inline set_value_at(ndarray arr, object loc, object value): set_value_at_unsafe(arr, loc, value) -cdef inline is_array(object o): - return cnp.PyArray_Check(o) - cdef inline bint _checknull(object val): try: return val is None or (cpython.PyFloat_Check(val) and val != val) except ValueError: return False + cdef inline bint _checknan(object val): return not cnp.PyArray_Check(val) and val != val -cdef inline bint is_period_object(object val): - return getattr(val, '_typ', '_typ') == 'period' - cdef inline object unbox_if_zerodim(object arr): """ diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 26e39f0df8b29..42ac26e4b274e 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -21,7 +21,7 @@ import numpy as np import struct import pandas.io.sas.sas_constants as const -from pandas.io.sas._sas import Parser +from pandas.io.sas.sas import Parser class _subheader_pointer(object): diff --git a/setup.py b/setup.py index 27943a776c414..1462e8b70b09c 100755 --- a/setup.py +++ b/setup.py @@ -36,9 +36,11 @@ def is_platform_mac(): try: import Cython ver = Cython.__version__ + from Cython.Build import cythonize _CYTHON_INSTALLED = ver >= LooseVersion(min_cython_ver) except ImportError: _CYTHON_INSTALLED = False + cythonize = lambda x: x # dummy function min_numpy_ver = '1.9.0' @@ -103,8 +105,8 @@ def is_platform_mac(): class build_ext(_build_ext): - def build_extensions(self): - + @classmethod + def render_templates(cls): # if builing from c files, don't need to # generate template output if cython: @@ -125,6 +127,9 @@ def build_extensions(self): with open(outfile, "w") as f: f.write(pyxcontent) + def build_extensions(self): + self.render_templates() + numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') for ext in self.extensions: @@ -328,6 +333,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/_libs/tslibs/resolution.pyx', 'pandas/_libs/tslibs/parsing.pyx', + 'pandas/_libs/util.pyx', 'pandas/io/sas/sas.pyx'] def initialize_options(self): @@ -460,22 +466,18 @@ def pxd(name): ext_data = { '_libs.algos': { 'pyxfile': '_libs/algos', - 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], 'depends': _pxi_dep['algos']}, '_libs.groupby': { 'pyxfile': '_libs/groupby', - 'pxdfiles': ['_libs/src/util', '_libs/algos'], 'depends': _pxi_dep['groupby']}, '_libs.hashing': { 'pyxfile': '_libs/hashing'}, '_libs.hashtable': { 'pyxfile': '_libs/hashtable', - 'pxdfiles': ['_libs/hashtable', '_libs/missing', '_libs/khash'], 'depends': (['pandas/_libs/src/klib/khash_python.h'] + _pxi_dep['hashtable'])}, '_libs.index': { 'pyxfile': '_libs/index', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], 'depends': _pxi_dep['index'], 'sources': np_datetime_sources}, '_libs.indexing': { @@ -484,21 +486,15 @@ def pxd(name): 'pyxfile': '_libs/internals'}, '_libs.interval': { 'pyxfile': '_libs/interval', - 'pxdfiles': ['_libs/hashtable'], 'depends': _pxi_dep['interval']}, '_libs.join': { 'pyxfile': '_libs/join', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], 'depends': _pxi_dep['join']}, '_libs.lib': { 'pyxfile': '_libs/lib', - 'pxdfiles': ['_libs/src/util', - '_libs/missing', - '_libs/tslibs/conversion'], 'depends': lib_depends + tseries_depends}, '_libs.missing': { 'pyxfile': '_libs/missing', - 'pxdfiles': ['_libs/src/util'], 'depends': tseries_depends}, '_libs.parsers': { 'pyxfile': '_libs/parsers', @@ -508,15 +504,9 @@ def pxd(name): 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, '_libs.reduction': { - 'pyxfile': '_libs/reduction', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/reduction'}, '_libs.tslibs.period': { 'pyxfile': '_libs/tslibs/period', - 'pxdfiles': ['_libs/src/util', - '_libs/missing', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timezones', - '_libs/tslibs/nattype'], 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], 'sources': np_datetime_sources + ['pandas/_libs/src/period_helper.c']}, '_libs.properties': { @@ -533,99 +523,63 @@ def pxd(name): 'depends': _pxi_dep['sparse']}, '_libs.tslib': { 'pyxfile': '_libs/tslib', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/conversion', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timestamps', - '_libs/tslibs/timezones', - '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.ccalendar': { 'pyxfile': '_libs/tslibs/ccalendar'}, '_libs.tslibs.conversion': { 'pyxfile': '_libs/tslibs/conversion', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype', - '_libs/tslibs/timezones', - '_libs/tslibs/timedeltas'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.fields': { 'pyxfile': '_libs/tslibs/fields', - 'pxdfiles': ['_libs/tslibs/ccalendar', - '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.frequencies': { - 'pyxfile': '_libs/tslibs/frequencies', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/tslibs/frequencies'}, '_libs.tslibs.nattype': { - 'pyxfile': '_libs/tslibs/nattype', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/tslibs/nattype'}, '_libs.tslibs.np_datetime': { 'pyxfile': '_libs/tslibs/np_datetime', 'depends': np_datetime_headers, 'sources': np_datetime_sources}, '_libs.tslibs.offsets': { 'pyxfile': '_libs/tslibs/offsets', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/conversion', - '_libs/tslibs/frequencies', - '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.parsing': { - 'pyxfile': '_libs/tslibs/parsing', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/tslibs/parsing'}, '_libs.tslibs.resolution': { 'pyxfile': '_libs/tslibs/resolution', - 'pxdfiles': ['_libs/src/util', - '_libs/khash', - '_libs/tslibs/frequencies', - '_libs/tslibs/timezones'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.strptime': { 'pyxfile': '_libs/tslibs/strptime', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.timedeltas': { 'pyxfile': '_libs/tslibs/timedeltas', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/nattype'], 'depends': np_datetime_headers, 'sources': np_datetime_sources}, '_libs.tslibs.timestamps': { 'pyxfile': '_libs/tslibs/timestamps', - 'pxdfiles': ['_libs/src/util', - '_libs/tslibs/ccalendar', - '_libs/tslibs/conversion', - '_libs/tslibs/nattype', - '_libs/tslibs/timedeltas', - '_libs/tslibs/timezones'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.timezones': { - 'pyxfile': '_libs/tslibs/timezones', - 'pxdfiles': ['_libs/src/util']}, + 'pyxfile': '_libs/tslibs/timezones'}, '_libs.testing': { 'pyxfile': '_libs/testing'}, + '_libs.util': { + 'pyxfile': '_libs/util'}, '_libs.window': { - 'pyxfile': '_libs/window', - 'pxdfiles': ['_libs/skiplist', '_libs/src/util']}, - 'io.sas._sas': { + 'pyxfile': '_libs/window'}, + 'io.sas.sas': { 'pyxfile': 'io/sas/sas'}} extensions = [] for name, data in ext_data.items(): sources = [srcpath(data['pyxfile'], suffix=suffix, subdir='')] - pxds = [pxd(x) for x in data.get('pxdfiles', [])] - if suffix == '.pyx' and pxds: - sources.extend(pxds) sources.extend(data.get('sources', [])) @@ -714,6 +668,8 @@ def pxd(name): # The build cache system does string matching below this point. # if you change something, be careful. +build_ext.render_templates() + setup(name=DISTNAME, maintainer=AUTHOR, version=versioneer.get_version(), @@ -723,7 +679,7 @@ def pxd(name): 'data/legacy_pickle/*/*.pickle', 'data/legacy_msgpack/*/*.msgpack', 'data/html_encoding/*.html']}, - ext_modules=extensions, + ext_modules=cythonize(extensions), maintainer_email=EMAIL, description=DESCRIPTION, license=LICENSE, From 351a2bdc972848de0926bbb3e046cd1134a57fe5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 17:03:56 -0800 Subject: [PATCH 02/10] use cythonize for build --- pandas/_libs/internals.pyx | 2 +- pandas/_libs/khash.pxd | 2 +- pandas/_libs/parsers.pyx | 6 +- pandas/_libs/src/parser/tokenizer.h | 2 +- pandas/_libs/tslib.pyx | 7 +-- pandas/_libs/tslibs/np_datetime.pxd | 1 - pandas/_libs/tslibs/offsets.pyx | 10 +-- pandas/_libs/tslibs/period.pyx | 5 +- pandas/_libs/window.pyx | 2 +- setup.py | 98 ++++++++++++----------------- 10 files changed, 56 insertions(+), 79 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index b46a05a0842c3..dff8d18c2de4e 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -12,7 +12,7 @@ cdef extern from "Python.h": import numpy as np from numpy cimport int64_t -cdef extern from "compat_helper.h": +cdef extern from "src/compat_helper.h": cdef int slice_get_indices(PyObject* s, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd index b1d965c3618cd..ba09721c6bd35 100644 --- a/pandas/_libs/khash.pxd +++ b/pandas/_libs/khash.pxd @@ -3,7 +3,7 @@ from cpython cimport PyObject from numpy cimport int64_t, uint64_t, int32_t, uint32_t, float64_t -cdef extern from "khash_python.h": +cdef extern from "src/klib/khash_python.h": ctypedef uint32_t khint_t ctypedef khint_t khiter_t diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index efe61716d0831..bcf095121d921 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -72,7 +72,7 @@ cdef double NEGINF = -INF cdef extern from "errno.h": int errno -cdef extern from "headers/portable.h": +cdef extern from "src/headers/portable.h": # I *think* this is here so that strcasecmp is defined on Windows # so we don't get # `parsers.obj : error LNK2001: unresolved external symbol strcasecmp` @@ -87,7 +87,7 @@ except NameError: basestring = str -cdef extern from "parser/tokenizer.h": +cdef extern from "src/parser/tokenizer.h": ctypedef enum ParserState: START_RECORD @@ -243,7 +243,7 @@ cdef extern from "parser/tokenizer.h": int to_boolean(const char *item, uint8_t *val) nogil -cdef extern from "parser/io.h": +cdef extern from "src/parser/io.h": void *new_mmap(char *fname) int del_mmap(void *src) void* buffer_mmap_bytes(void *source, size_t nbytes, diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 63baf91e3c136..90907978763f2 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -28,7 +28,7 @@ See LICENSE for the license #include "../headers/stdint.h" -#include "khash.h" +#include "../klib/khash.h" #define CHUNKSIZE 1024 * 256 #define KB 1024 diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 81df7981096ba..0c4acca679dd1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -25,9 +25,7 @@ from tslibs.np_datetime cimport (check_dts_bounds, _string_to_dts, dt64_to_dtstruct, dtstruct_to_dt64, pydatetime_to_dt64, pydate_to_dt64, - get_datetime64_value, - days_per_month_table, - dayofweek, is_leapyear) + get_datetime64_value) from tslibs.np_datetime import OutOfBoundsDatetime from tslibs.parsing import parse_datetime_string @@ -40,6 +38,7 @@ import pytz UTC = pytz.utc +from tslibs.ccalendar cimport get_days_in_month, dayofweek from tslibs.timedeltas cimport cast_from_unit from tslibs.timedeltas import Timedelta from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset, @@ -749,7 +748,7 @@ def monthrange(int64_t year, int64_t month): if month < 1 or month > 12: raise ValueError("bad month number 0; must be 1-12") - days = days_per_month_table[is_leapyear(year)][month - 1] + days = get_days_in_month(year, month) return (dayofweek(year, month, 1), days) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 33b8b32bcf2dc..e30dcce1160de 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -54,7 +54,6 @@ cdef extern from "../src/datetime/np_datetime.h": PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *result) nogil - int days_per_month_table[2][12] int dayofweek(int y, int m, int d) nogil int is_leapyear(int64_t year) nogil diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 46091ef357708..11d49b38e0d72 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -18,12 +18,12 @@ cnp.import_array() from ..util cimport is_string_object, is_integer_object from ccalendar import MONTHS, DAYS +from ccalendar cimport get_days_in_month, dayofweek from conversion cimport tz_convert_single, pydt_to_i8 from frequencies cimport get_freq_code from nattype cimport NPY_NAT from np_datetime cimport (pandas_datetimestruct, - dtstruct_to_dt64, dt64_to_dtstruct, - is_leapyear, days_per_month_table, dayofweek) + dtstruct_to_dt64, dt64_to_dtstruct) # --------------------------------------------------------------------- # Constants @@ -390,12 +390,6 @@ class BaseOffset(_BaseOffset): # ---------------------------------------------------------------------- # RelativeDelta Arithmetic -@cython.wraparound(False) -@cython.boundscheck(False) -cdef inline int get_days_in_month(int year, int month) nogil: - return days_per_month_table[is_leapyear(year)][month - 1] - - cdef inline int year_add_months(pandas_datetimestruct dts, int months) nogil: """new year number after shifting pandas_datetimestruct number of months""" return dts.year + (dts.month + months - 1) / 12 diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 8af2e84cda340..c190b951b4376 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -22,7 +22,7 @@ from cpython.datetime cimport PyDateTime_Check, PyDateTime_IMPORT PyDateTime_IMPORT from np_datetime cimport (pandas_datetimestruct, dtstruct_to_dt64, - dt64_to_dtstruct, is_leapyear) + dt64_to_dtstruct) from ..cimport util from ..util cimport is_period_object, is_string_object, INT32_MIN @@ -34,6 +34,7 @@ from timezones cimport is_utc, is_tzlocal, get_utcoffset, get_dst_info from timedeltas cimport delta_to_nanoseconds from ccalendar import MONTH_NUMBERS +from ccalendar cimport is_leapyear from frequencies cimport (get_freq_code, get_base_alias, get_to_timestamp_base, get_freq_str, get_rule_month) @@ -46,7 +47,7 @@ from pandas.tseries import offsets from pandas.tseries import frequencies -cdef extern from "period_helper.h": +cdef extern from "../src/period_helper.h": ctypedef struct date_info: int64_t absdate double abstime diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index cacb073da581c..c18b03c506a26 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -12,7 +12,7 @@ from numpy cimport ndarray, double_t, int64_t, float64_t cnp.import_array() -cdef extern from "../src/headers/math.h": +cdef extern from "src/headers/math.h": int signbit(double) nogil double sqrt(double x) nogil diff --git a/setup.py b/setup.py index 1462e8b70b09c..cd43277345e1c 100755 --- a/setup.py +++ b/setup.py @@ -139,6 +139,20 @@ def build_extensions(self): _build_ext.build_extensions(self) +def _cythonize(*args, **kwargs): + """ + Render tempita templates before calling cythonize + + Avoid running cythonize on `python setup.py clean` + See https://github.com/cython/cython/issues/1495 + """ + if len(sys.argv) > 1 and 'clean' in sys.argv: + return + + build_ext.render_templates() + cythonize(*args, **kwargs) + + DESCRIPTION = ("Powerful data structures for data analysis, time series," "and statistics") LONG_DESCRIPTION = """ @@ -458,113 +472,84 @@ def pxd(name): 'pandas/_libs/src/datetime/np_datetime_strings.h'] np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c'] -tseries_depends = np_datetime_headers + ['pandas/_libs/tslibs/np_datetime.pxd'] # some linux distros require it libraries = ['m'] if not is_platform_windows() else [] ext_data = { '_libs.algos': { - 'pyxfile': '_libs/algos', - 'depends': _pxi_dep['algos']}, + 'pyxfile': '_libs/algos'}, '_libs.groupby': { - 'pyxfile': '_libs/groupby', - 'depends': _pxi_dep['groupby']}, + 'pyxfile': '_libs/groupby'}, '_libs.hashing': { 'pyxfile': '_libs/hashing'}, '_libs.hashtable': { - 'pyxfile': '_libs/hashtable', - 'depends': (['pandas/_libs/src/klib/khash_python.h'] + - _pxi_dep['hashtable'])}, + 'pyxfile': '_libs/hashtable'}, '_libs.index': { - 'pyxfile': '_libs/index', - 'depends': _pxi_dep['index'], - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/index'}, '_libs.indexing': { 'pyxfile': '_libs/indexing'}, '_libs.internals': { 'pyxfile': '_libs/internals'}, '_libs.interval': { - 'pyxfile': '_libs/interval', - 'depends': _pxi_dep['interval']}, + 'pyxfile': '_libs/interval'}, '_libs.join': { - 'pyxfile': '_libs/join', - 'depends': _pxi_dep['join']}, + 'pyxfile': '_libs/join'}, '_libs.lib': { - 'pyxfile': '_libs/lib', - 'depends': lib_depends + tseries_depends}, + 'pyxfile': '_libs/lib'}, '_libs.missing': { - 'pyxfile': '_libs/missing', - 'depends': tseries_depends}, + 'pyxfile': '_libs/missing'}, '_libs.parsers': { 'pyxfile': '_libs/parsers', 'depends': ['pandas/_libs/src/parser/tokenizer.h', - 'pandas/_libs/src/parser/io.h', - 'pandas/_libs/src/numpy_helper.h'], + 'pandas/_libs/src/parser/io.h'], 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, '_libs.reduction': { 'pyxfile': '_libs/reduction'}, '_libs.tslibs.period': { 'pyxfile': '_libs/tslibs/period', - 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], - 'sources': np_datetime_sources + ['pandas/_libs/src/period_helper.c']}, + 'include': ['pandas/_libs/src/period_helper.h'], + 'sources': ['pandas/_libs/src/period_helper.c']}, '_libs.properties': { 'pyxfile': '_libs/properties', 'include': []}, '_libs.reshape': { - 'pyxfile': '_libs/reshape', - 'depends': _pxi_dep['reshape']}, + 'pyxfile': '_libs/reshape'}, '_libs.skiplist': { - 'pyxfile': '_libs/skiplist', - 'depends': ['pandas/_libs/src/skiplist.h']}, + 'pyxfile': '_libs/skiplist'}, '_libs.sparse': { - 'pyxfile': '_libs/sparse', - 'depends': _pxi_dep['sparse']}, + 'pyxfile': '_libs/sparse'}, '_libs.tslib': { - 'pyxfile': '_libs/tslib', - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslib'}, '_libs.tslibs.ccalendar': { 'pyxfile': '_libs/tslibs/ccalendar'}, '_libs.tslibs.conversion': { 'pyxfile': '_libs/tslibs/conversion', - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'sources': np_datetime_sources, + 'include': np_datetime_headers}, '_libs.tslibs.fields': { - 'pyxfile': '_libs/tslibs/fields', - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/fields'}, '_libs.tslibs.frequencies': { 'pyxfile': '_libs/tslibs/frequencies'}, '_libs.tslibs.nattype': { 'pyxfile': '_libs/tslibs/nattype'}, '_libs.tslibs.np_datetime': { 'pyxfile': '_libs/tslibs/np_datetime', - 'depends': np_datetime_headers, - 'sources': np_datetime_sources}, + 'sources': np_datetime_sources, + 'include': np_datetime_headers}, '_libs.tslibs.offsets': { - 'pyxfile': '_libs/tslibs/offsets', - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/offsets'}, '_libs.tslibs.parsing': { 'pyxfile': '_libs/tslibs/parsing'}, '_libs.tslibs.resolution': { - 'pyxfile': '_libs/tslibs/resolution', - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/resolution'}, '_libs.tslibs.strptime': { - 'pyxfile': '_libs/tslibs/strptime', - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/strptime'}, '_libs.tslibs.timedeltas': { - 'pyxfile': '_libs/tslibs/timedeltas', - 'depends': np_datetime_headers, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/timedeltas'}, '_libs.tslibs.timestamps': { - 'pyxfile': '_libs/tslibs/timestamps', - 'depends': tseries_depends, - 'sources': np_datetime_sources}, + 'pyxfile': '_libs/tslibs/timestamps'}, '_libs.tslibs.timezones': { 'pyxfile': '_libs/tslibs/timezones'}, '_libs.testing': { @@ -583,7 +568,7 @@ def pxd(name): sources.extend(data.get('sources', [])) - include = data.get('include', common_include) + include = data.get('include', []) obj = Extension('pandas.{name}'.format(name=name), sources=sources, @@ -668,7 +653,6 @@ def pxd(name): # The build cache system does string matching below this point. # if you change something, be careful. -build_ext.render_templates() setup(name=DISTNAME, maintainer=AUTHOR, @@ -679,7 +663,7 @@ def pxd(name): 'data/legacy_pickle/*/*.pickle', 'data/legacy_msgpack/*/*.msgpack', 'data/html_encoding/*.html']}, - ext_modules=cythonize(extensions), + ext_modules=_cythonize(extensions), maintainer_email=EMAIL, description=DESCRIPTION, license=LICENSE, From 70652497801a3738c2a7f2b26ef3f3e7f24e6dba Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 19:19:22 -0800 Subject: [PATCH 03/10] fix missing return --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cd43277345e1c..e7b4ba06f5b18 100755 --- a/setup.py +++ b/setup.py @@ -150,7 +150,7 @@ def _cythonize(*args, **kwargs): return build_ext.render_templates() - cythonize(*args, **kwargs) + return cythonize(*args, **kwargs) DESCRIPTION = ("Powerful data structures for data analysis, time series," From 2b8ab2f7bddd71bbcdca7deeea59bb0a63e0197c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 31 Jan 2018 21:34:15 -0800 Subject: [PATCH 04/10] update cimport --- pandas/_libs/tslibs/timestamps.pyx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 13621f0c0e010..476db096a5aaa 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -29,8 +29,7 @@ from nattype import NaT from nattype cimport NPY_NAT from np_datetime import OutOfBoundsDatetime from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds, - pandas_datetimestruct, dt64_to_dtstruct, - is_leapyear) + pandas_datetimestruct, dt64_to_dtstruct) from timedeltas import Timedelta from timedeltas cimport delta_to_nanoseconds from timezones cimport ( @@ -764,7 +763,7 @@ class Timestamp(_Timestamp): @property def is_leap_year(self): - return bool(is_leapyear(self.year)) + return bool(ccalendar.is_leapyear(self.year)) def tz_localize(self, tz, ambiguous='raise', errors='raise'): """ From c8c2c95861ca77d4bcb33d163ee95ea5fc930d4c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 1 Feb 2018 10:27:49 -0800 Subject: [PATCH 05/10] troubleshoot stdint.h complaint by removing khash dependency --- pandas/_libs/tslibs/resolution.pyx | 41 +++--------------------------- 1 file changed, 4 insertions(+), 37 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index f77cadbe9d930..4050d520784fe 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -10,11 +10,6 @@ cnp.import_array() from ..util cimport is_string_object, get_nat -from pandas._libs.khash cimport (khiter_t, - kh_destroy_int64, kh_put_int64, - kh_init_int64, kh_int64_t, - kh_resize_int64, kh_get_int64) - from cpython.datetime cimport datetime from np_datetime cimport pandas_datetimestruct, dt64_to_dtstruct @@ -346,34 +341,6 @@ class Resolution(object): # ---------------------------------------------------------------------- # Frequency Inference - -# TODO: this is non performant logic here (and duplicative) and this -# simply should call unique_1d directly -# plus no reason to depend on khash directly -cdef unique_deltas(ndarray[int64_t] arr): - cdef: - Py_ssize_t i, n = len(arr) - int64_t val - khiter_t k - kh_int64_t *table - int ret = 0 - list uniques = [] - - table = kh_init_int64() - kh_resize_int64(table, 10) - for i in range(n - 1): - val = arr[i + 1] - arr[i] - k = kh_get_int64(table, val) - if k == table.n_buckets: - kh_put_int64(table, val, &ret) - uniques.append(val) - kh_destroy_int64(table) - - result = np.array(uniques, dtype=np.int64) - result.sort() - return result - - def _is_multiple(us, mult): return us % mult == 0 @@ -410,11 +377,11 @@ class _FrequencyInferer(object): @cache_readonly def deltas(self): - return unique_deltas(self.values) + return unique(np.diff(self.values)) @cache_readonly def deltas_asi8(self): - return unique_deltas(self.index.asi8) + return unique(np.diff(self.index.asi8)) @cache_readonly def is_unique(self): @@ -520,11 +487,11 @@ class _FrequencyInferer(object): @cache_readonly def mdiffs(self): nmonths = self.fields['Y'] * 12 + self.fields['M'] - return unique_deltas(nmonths.astype('i8')) + return unique(np.diff(nmonths.astype('i8'))) @cache_readonly def ydiffs(self): - return unique_deltas(self.fields['Y'].astype('i8')) + return unique(np.diff(self.fields['Y'].astype('i8'))) def _infer_daily_rule(self): annual_rule = self._get_annual_rule() From 4ede1fe46178d02258156025692fcbeaf111c0b0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 1 Feb 2018 11:08:27 -0800 Subject: [PATCH 06/10] lean in to the troubleshooting kludge --- pandas/_libs/tslibs/resolution.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 4050d520784fe..8b412ced483af 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -377,11 +377,11 @@ class _FrequencyInferer(object): @cache_readonly def deltas(self): - return unique(np.diff(self.values)) + return np.sort(unique(np.diff(self.values))) @cache_readonly def deltas_asi8(self): - return unique(np.diff(self.index.asi8)) + return np.sort(unique(np.diff(self.index.asi8))) @cache_readonly def is_unique(self): @@ -487,11 +487,11 @@ class _FrequencyInferer(object): @cache_readonly def mdiffs(self): nmonths = self.fields['Y'] * 12 + self.fields['M'] - return unique(np.diff(nmonths.astype('i8'))) + return np.sort(unique(np.diff(nmonths.astype('i8')))) @cache_readonly def ydiffs(self): - return unique(np.diff(self.fields['Y'].astype('i8'))) + return np.sort(unique(np.diff(self.fields['Y'].astype('i8')))) def _infer_daily_rule(self): annual_rule = self._get_annual_rule() From 5b24944f921032b2d7281056f785abb867cc9af1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Feb 2018 15:51:27 -0800 Subject: [PATCH 07/10] troubleshoot --- setup.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index be998a85e13be..37e2d4ada7838 100755 --- a/setup.py +++ b/setup.py @@ -103,6 +103,7 @@ def is_platform_mac(): _pxifiles.extend(pxi_files) _pxi_dep[module] = pxi_files +numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') class build_ext(_build_ext): @classmethod @@ -130,8 +131,6 @@ def render_templates(cls): def build_extensions(self): self.render_templates() - numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') - for ext in self.extensions: if (hasattr(ext, 'include_dirs') and numpy_incl not in ext.include_dirs): @@ -139,7 +138,7 @@ def build_extensions(self): _build_ext.build_extensions(self) -def _cythonize(*args, **kwargs): +def _cythonize(extensions, *args, **kwargs): """ Render tempita templates before calling cythonize @@ -149,8 +148,13 @@ def _cythonize(*args, **kwargs): if len(sys.argv) > 1 and 'clean' in sys.argv: return + for ext in extensions: + if (hasattr(ext, 'include_dirs') and + numpy_incl not in ext.include_dirs): + ext.include_dirs.append(numpy_incl) + build_ext.render_templates() - return cythonize(*args, **kwargs) + return cythonize(extensions, *args, **kwargs) DESCRIPTION = ("Powerful data structures for data analysis, time series," @@ -571,7 +575,7 @@ def pxd(name): sources.extend(data.get('sources', [])) - include = data.get('include', []) + include = data.get('include', common_include) obj = Extension('pandas.{name}'.format(name=name), sources=sources, From d7cd52194648ab60ef91c7de7eae19bde7c16264 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Feb 2018 17:12:30 -0800 Subject: [PATCH 08/10] whitespace cleanup --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 37e2d4ada7838..e54720f5bf4b5 100755 --- a/setup.py +++ b/setup.py @@ -105,6 +105,7 @@ def is_platform_mac(): numpy_incl = pkg_resources.resource_filename('numpy', 'core/include') + class build_ext(_build_ext): @classmethod def render_templates(cls): @@ -150,8 +151,8 @@ def _cythonize(extensions, *args, **kwargs): for ext in extensions: if (hasattr(ext, 'include_dirs') and - numpy_incl not in ext.include_dirs): - ext.include_dirs.append(numpy_incl) + numpy_incl not in ext.include_dirs): + ext.include_dirs.append(numpy_incl) build_ext.render_templates() return cythonize(extensions, *args, **kwargs) From 74e8f358582c7fe23971a9ed11ddb5acbc4131dc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 5 Feb 2018 17:47:56 -0800 Subject: [PATCH 09/10] Implement tslibs.util for compat with cython<=0.24 --- pandas/_libs/tslibs/conversion.pyx | 6 +- pandas/_libs/tslibs/frequencies.pyx | 2 +- pandas/_libs/tslibs/nattype.pyx | 6 +- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/_libs/tslibs/period.pyx | 7 ++- pandas/_libs/tslibs/resolution.pyx | 2 +- pandas/_libs/tslibs/strptime.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 8 +-- pandas/_libs/tslibs/timestamps.pyx | 7 +-- pandas/_libs/tslibs/timezones.pyx | 2 +- pandas/_libs/tslibs/util.pxd | 22 +++++++ pandas/_libs/tslibs/util.pyx | 89 +++++++++++++++++++++++++++++ pandas/_libs/util.pxd | 18 ++---- pandas/_libs/util.pyx | 77 +------------------------ setup.py | 3 + 15 files changed, 144 insertions(+), 109 deletions(-) create mode 100644 pandas/_libs/tslibs/util.pxd create mode 100644 pandas/_libs/tslibs/util.pyx diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index ce327de2e41e1..a32bfc1f6836c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -27,9 +27,9 @@ from np_datetime cimport (check_dts_bounds, get_datetime64_unit, get_datetime64_value, pydatetime_to_dt64) -from ..util cimport (is_string_object, - is_datetime64_object, - is_integer_object, is_float_object, is_array) +from util cimport (is_string_object, + is_datetime64_object, + is_integer_object, is_float_object, is_array) from timedeltas cimport cast_from_unit from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index a977773d49cf9..abaf8cad09bdb 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -8,7 +8,7 @@ cimport numpy as cnp from numpy cimport int64_t cnp.import_array() -from ..util cimport is_integer_object, is_string_object +from util cimport is_integer_object, is_string_object from ccalendar import MONTH_NUMBERS diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index c2d20e85af7a5..9f4ef4e515058 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -17,9 +17,9 @@ cimport numpy as cnp from numpy cimport int64_t cnp.import_array() -from ..util cimport (get_nat, - is_integer_object, is_float_object, - is_datetime64_object, is_timedelta64_object) +from util cimport (get_nat, + is_integer_object, is_float_object, + is_datetime64_object, is_timedelta64_object) # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 11d49b38e0d72..998225faf5888 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -15,7 +15,7 @@ from numpy cimport int64_t cnp.import_array() -from ..util cimport is_string_object, is_integer_object +from util cimport is_string_object, is_integer_object from ccalendar import MONTHS, DAYS from ccalendar cimport get_days_in_month, dayofweek diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c190b951b4376..55df4b6505953 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -24,8 +24,8 @@ PyDateTime_IMPORT from np_datetime cimport (pandas_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct) -from ..cimport util -from ..util cimport is_period_object, is_string_object, INT32_MIN +cimport util +from util cimport is_period_object, is_string_object from pandas._libs.missing cimport is_null_datetimelike @@ -47,6 +47,9 @@ from pandas.tseries import offsets from pandas.tseries import frequencies +cdef extern from "../src/headers/stdint.h": + enum: INT32_MIN + cdef extern from "../src/period_helper.h": ctypedef struct date_info: int64_t absdate diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 8b412ced483af..ed5ba2d78cf77 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -8,7 +8,7 @@ cimport numpy as cnp from numpy cimport ndarray, int64_t cnp.import_array() -from ..util cimport is_string_object, get_nat +from util cimport is_string_object, get_nat from cpython.datetime cimport datetime diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 7b64468bc4a31..e7dabb94f8975 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -35,7 +35,7 @@ from cpython.datetime cimport datetime from np_datetime cimport (check_dts_bounds, dtstruct_to_dt64, pandas_datetimestruct) -from ..util cimport is_string_object +from util cimport is_string_object from nattype cimport checknull_with_nat, NPY_NAT from nattype import nat_strings diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index c607f02158622..37693068e0974 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -21,10 +21,10 @@ from cpython.datetime cimport (datetime, timedelta, PyDateTime_IMPORT -from ..cimport util -from ..util cimport (is_timedelta64_object, is_datetime64_object, - is_integer_object, is_float_object, - is_string_object) +cimport util +from util cimport (is_timedelta64_object, is_datetime64_object, + is_integer_object, is_float_object, + is_string_object) from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, pandas_timedeltastruct) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 476db096a5aaa..6f9cda047af18 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -16,9 +16,8 @@ from cpython.datetime cimport (datetime, PyDateTime_IMPORT) PyDateTime_IMPORT -from ..util cimport (is_datetime64_object, is_timedelta64_object, - is_integer_object, is_string_object, is_array, - INT64_MAX) +from util cimport (is_datetime64_object, is_timedelta64_object, + is_integer_object, is_string_object, is_array) cimport ccalendar from conversion import tz_localize_to_utc, date_normalize @@ -1005,7 +1004,7 @@ class Timestamp(_Timestamp): # Add the min and max fields at the class level -cdef int64_t _NS_UPPER_BOUND = INT64_MAX +cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max # the smallest value we could actually represent is # INT64_MIN + 1 == -9223372036854775807 # but to allow overflow free conversion with a microsecond resolution diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index f2f9b3245a7d7..215ae9ce087ee 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -23,7 +23,7 @@ from numpy cimport ndarray, int64_t cnp.import_array() # ---------------------------------------------------------------------- -from ..util cimport is_string_object, is_integer_object, get_nat +from util cimport is_string_object, is_integer_object, get_nat cdef int64_t NPY_NAT = get_nat() diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd new file mode 100644 index 0000000000000..34375bd18f73b --- /dev/null +++ b/pandas/_libs/tslibs/util.pxd @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +from cython cimport Py_ssize_t + +cimport numpy as cnp +from numpy cimport ndarray, int64_t + + +cdef bint is_string_object(object obj) nogil +cdef bint is_integer_object(object obj) nogil +cdef bint is_float_object(object obj) nogil +cdef bint is_complex_object(object obj) nogil +cdef bint is_bool_object(object obj) nogil +cdef bint is_timedelta64_object(object obj) nogil +cdef bint is_datetime64_object(object obj) nogil +cdef bint is_array(object o) +cdef bint is_period_object(object val) + +cdef bint _checknull(object val) +cdef bint _checknan(object val) + +cdef int64_t get_nat() diff --git a/pandas/_libs/tslibs/util.pyx b/pandas/_libs/tslibs/util.pyx new file mode 100644 index 0000000000000..83f679c6f573c --- /dev/null +++ b/pandas/_libs/tslibs/util.pyx @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +cimport cpython +from cpython cimport PyTypeObject + +cdef extern from "Python.h": + # Note: importing extern-style allows us to declare these as nogil + # functions, whereas `from cpython cimport` does not. + bint PyUnicode_Check(object obj) nogil + bint PyString_Check(object obj) nogil + bint PyBool_Check(object obj) nogil + bint PyFloat_Check(object obj) nogil + bint PyComplex_Check(object obj) nogil + bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS +cnp.import_array() + +cdef extern from "numpy/arrayobject.h": + PyTypeObject PyFloatingArrType_Type + +cdef extern from "numpy/ndarrayobject.h": + PyTypeObject PyTimedeltaArrType_Type + PyTypeObject PyDatetimeArrType_Type + PyTypeObject PyComplexFloatingArrType_Type + PyTypeObject PyBoolArrType_Type + + bint PyArray_IsIntegerScalar(obj) nogil + bint PyArray_Check(obj) nogil + + +cdef int64_t get_nat(): + return np.datetime64('NaT').astype(np.int64) + +# -------------------------------------------------------------------- +# Type Checking + +cdef bint is_string_object(object obj) nogil: + return PyString_Check(obj) or PyUnicode_Check(obj) + + +cdef bint is_integer_object(object obj) nogil: + return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) + + +cdef bint is_float_object(object obj) nogil: + return (PyFloat_Check(obj) or + (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) + + +cdef bint is_complex_object(object obj) nogil: + return (PyComplex_Check(obj) or + PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) + + +cdef bint is_bool_object(object obj) nogil: + return (PyBool_Check(obj) or + PyObject_TypeCheck(obj, &PyBoolArrType_Type)) + + +cdef bint is_timedelta64_object(object obj) nogil: + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) + + +cdef bint is_datetime64_object(object obj) nogil: + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + + +cdef bint is_array(object o): + return cnp.PyArray_Check(o) + + +cdef bint is_period_object(object val): + return getattr(val, '_typ', '_typ') == 'period' + + +# -------------------------------------------------------------------- + + +cdef inline bint _checknull(object val): + try: + return val is None or (cpython.PyFloat_Check(val) and val != val) + except ValueError: + return False + + +cdef inline bint _checknan(object val): + return not cnp.PyArray_Check(val) and val != val diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index 5016362a6e37f..f485bc1514a81 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -44,19 +44,13 @@ cdef extern from "src/numpy_helper.h": object char_to_string(char*) -cdef bint is_string_object(object obj) nogil -cdef bint is_integer_object(object obj) nogil -cdef bint is_float_object(object obj) nogil -cdef bint is_complex_object(object obj) nogil -cdef bint is_bool_object(object obj) nogil -cdef bint is_timedelta64_object(object obj) nogil -cdef bint is_datetime64_object(object obj) nogil +from tslibs.util cimport (is_string_object, + is_integer_object, is_float_object, + is_complex_object, is_bool_object, + is_timedelta64_object, is_datetime64_object, + is_array, + is_period_object, _checknull, _checknan) -cdef bint is_array(object o) -cdef bint is_period_object(object val) - -cdef bint _checknull(object val) -cdef bint _checknan(object val) cdef object unbox_if_zerodim(object arr) diff --git a/pandas/_libs/util.pyx b/pandas/_libs/util.pyx index 81d5d22fdfab3..bc1bd92964567 100644 --- a/pandas/_libs/util.pyx +++ b/pandas/_libs/util.pyx @@ -2,75 +2,10 @@ cimport cpython from cpython cimport PyTypeObject -cdef extern from "Python.h": - # Note: importing extern-style allows us to declare these as nogil - # functions, whereas `from cpython cimport` does not. - bint PyUnicode_Check(object obj) nogil - bint PyString_Check(object obj) nogil - bint PyBool_Check(object obj) nogil - bint PyFloat_Check(object obj) nogil - bint PyComplex_Check(object obj) nogil - bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil - cimport numpy as cnp from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS cnp.import_array() -cdef extern from "numpy/arrayobject.h": - PyTypeObject PyFloatingArrType_Type - -cdef extern from "numpy/ndarrayobject.h": - PyTypeObject PyTimedeltaArrType_Type - PyTypeObject PyDatetimeArrType_Type - PyTypeObject PyComplexFloatingArrType_Type - PyTypeObject PyBoolArrType_Type - - bint PyArray_IsIntegerScalar(obj) nogil - bint PyArray_Check(obj) nogil - -# -------------------------------------------------------------------- -# Type Checking - -cdef bint is_string_object(object obj) nogil: - return PyString_Check(obj) or PyUnicode_Check(obj) - - -cdef bint is_integer_object(object obj) nogil: - return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) - - -cdef bint is_float_object(object obj) nogil: - return (PyFloat_Check(obj) or - (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) - - -cdef bint is_complex_object(object obj) nogil: - return (PyComplex_Check(obj) or - PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) - - -cdef bint is_bool_object(object obj) nogil: - return (PyBool_Check(obj) or - PyObject_TypeCheck(obj, &PyBoolArrType_Type)) - - -cdef bint is_timedelta64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - - -cdef bint is_datetime64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - - -cdef bint is_array(object o): - return cnp.PyArray_Check(o) - - -cdef bint is_period_object(object val): - return getattr(val, '_typ', '_typ') == 'period' - -# -------------------------------------------------------------------- - cdef object get_value_at(ndarray arr, object loc): cdef: @@ -114,6 +49,7 @@ cdef set_value_at_unsafe(ndarray arr, object loc, object value): assign_value_1d(arr, i, value) + cdef set_value_at(ndarray arr, object loc, object value): """Sets a value into the array after checking that the array is mutable. """ @@ -123,17 +59,6 @@ cdef set_value_at(ndarray arr, object loc, object value): set_value_at_unsafe(arr, loc, value) -cdef inline bint _checknull(object val): - try: - return val is None or (cpython.PyFloat_Check(val) and val != val) - except ValueError: - return False - - -cdef inline bint _checknan(object val): - return not cnp.PyArray_Check(val) and val != val - - cdef inline object unbox_if_zerodim(object arr): """ If arr is zerodim array, return a proper array scalar (e.g. np.int64). diff --git a/setup.py b/setup.py index e54720f5bf4b5..401ecfc75476f 100755 --- a/setup.py +++ b/setup.py @@ -352,6 +352,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/_libs/tslibs/resolution.pyx', 'pandas/_libs/tslibs/parsing.pyx', + 'pandas/_libs/tslibs/util.pyx', 'pandas/_libs/util.pyx', 'pandas/_libs/writers.pyx', 'pandas/io/sas/sas.pyx'] @@ -558,6 +559,8 @@ def pxd(name): 'pyxfile': '_libs/tslibs/timestamps'}, '_libs.tslibs.timezones': { 'pyxfile': '_libs/tslibs/timezones'}, + '_libs.tslibs.util': { + 'pyxfile': '_libs/tslibs/util'}, '_libs.testing': { 'pyxfile': '_libs/testing'}, '_libs.util': { From f09bee78797a20a2e1bd6ebdccf8a91ff00f01d9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 5 Feb 2018 22:47:35 -0800 Subject: [PATCH 10/10] fix cimport --- pandas/_libs/tslibs/period.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 55df4b6505953..ee95fc503595b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -47,6 +47,9 @@ from pandas.tseries import offsets from pandas.tseries import frequencies +cdef extern from "../src/numpy_helper.h": + object char_to_string(char*) + cdef extern from "../src/headers/stdint.h": enum: INT32_MIN @@ -343,7 +346,7 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): formatted = c_strftime(&dinfo, fmt) - result = util.char_to_string(formatted) + result = char_to_string(formatted) free(formatted) for i in range(len(extra_fmts)):