diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 705336dfadf90..5e3eb1f00b18c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -5,32 +5,23 @@ # distutils: define_macros=CYTHON_TRACE_NOGIL=0 cimport numpy as np -from numpy cimport (int32_t, int64_t, import_array, ndarray, - float64_t) +from numpy cimport int64_t, import_array, ndarray, float64_t import numpy as np -import sys -cdef bint PY3 = (sys.version_info[0] >= 3) from cpython cimport ( PyTypeObject, PyFloat_Check, - PyComplex_Check, - PyObject_RichCompareBool, - PyObject_RichCompare, - Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE, - PyUnicode_Check) + PyComplex_Check) cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) from util cimport (is_integer_object, is_float_object, is_string_object, - is_datetime64_object, is_timedelta64_object, - INT64_MAX) + is_datetime64_object, is_timedelta64_object) cimport util -from cpython.datetime cimport (PyDelta_Check, PyTZInfo_Check, - PyDateTime_Check, PyDate_Check, +from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyDateTime_IMPORT, timedelta, datetime) # import datetime C API @@ -38,13 +29,8 @@ PyDateTime_IMPORT # this is our datetime.pxd from datetime cimport _string_to_dts -# stdlib datetime imports -from datetime import time as datetime_time - from tslibs.np_datetime cimport (check_dts_bounds, - reverse_ops, - cmp_scalar, pandas_datetimestruct, dt64_to_dtstruct, dtstruct_to_dt64, pydatetime_to_dt64, pydate_to_dt64, @@ -67,38 +53,24 @@ UTC = pytz.utc import_array() -from tslibs.timedeltas cimport cast_from_unit, delta_to_nanoseconds +from tslibs.timedeltas cimport cast_from_unit from tslibs.timedeltas import Timedelta from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_pytz, - get_timezone, maybe_get_tz, + get_timezone, get_dst_info) -from tslibs.fields import get_start_end_field, get_date_field from tslibs.conversion cimport (tz_convert_single, _TSObject, - convert_to_tsobject, convert_datetime_to_tsobject, get_datetime64_nanos) -from tslibs.conversion import (tz_localize_to_utc, - tz_convert_single, date_normalize) +from tslibs.conversion import tz_convert_single from tslibs.nattype import NaT, nat_strings, iNaT from tslibs.nattype cimport _checknull_with_nat, NPY_NAT - -cdef inline object create_timestamp_from_ts( - int64_t value, pandas_datetimestruct dts, - object tz, object freq): - """ convenience routine to construct a Timestamp from its parts """ - cdef _Timestamp ts_base - ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, - dts.day, dts.hour, dts.min, - dts.sec, dts.us, tz) - ts_base.value = value - ts_base.freq = freq - ts_base.nanosecond = dts.ps / 1000 - - return ts_base +from tslibs.timestamps cimport (create_timestamp_from_ts, + _NS_UPPER_BOUND, _NS_LOWER_BOUND) +from tslibs.timestamps import Timestamp cdef inline object create_datetime_from_ts( @@ -210,625 +182,6 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): return result -_zero_time = datetime_time(0, 0) -_no_input = object() - -# Python front end to C extension type _Timestamp -# This serves as the box for datetime64 - - -class Timestamp(_Timestamp): - """Pandas replacement for datetime.datetime - - TimeStamp is the pandas equivalent of python's Datetime - and is interchangable with it in most cases. It's the type used - for the entries that make up a DatetimeIndex, and other timeseries - oriented data structures in pandas. - - Parameters - ---------- - ts_input : datetime-like, str, int, float - Value to be converted to Timestamp - freq : str, DateOffset - Offset which Timestamp will have - tz : string, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will have. - unit : string - numpy unit used for conversion, if ts_input is int or float - offset : str, DateOffset - Deprecated, use freq - - year, month, day : int - .. versionadded:: 0.19.0 - hour, minute, second, microsecond : int, optional, default 0 - .. versionadded:: 0.19.0 - tzinfo : datetime.tzinfo, optional, default None - .. versionadded:: 0.19.0 - - Notes - ----- - There are essentially three calling conventions for the constructor. The - primary form accepts four parameters. They can be passed by position or - keyword. - - The other two forms mimic the parameters from ``datetime.datetime``. They - can be passed by either position or keyword, but not both mixed together. - - Examples - -------- - >>> pd.Timestamp('2017-01-01T12') - Timestamp('2017-01-01 12:00:00') - - >>> pd.Timestamp(2017, 1, 1, 12) - Timestamp('2017-01-01 12:00:00') - - >>> pd.Timestamp(year=2017, month=1, day=1, hour=12) - Timestamp('2017-01-01 12:00:00') - """ - - @classmethod - def fromordinal(cls, ordinal, freq=None, tz=None, offset=None): - """ - Timestamp.fromordinal(ordinal, freq=None, tz=None, offset=None) - - passed an ordinal, translate and convert to a ts - note: by definition there cannot be any tz info on the ordinal itself - - Parameters - ---------- - ordinal : int - date corresponding to a proleptic Gregorian ordinal - freq : str, DateOffset - Offset which Timestamp will have - tz : string, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will have. - offset : str, DateOffset - Deprecated, use freq - """ - return cls(datetime.fromordinal(ordinal), - freq=freq, tz=tz, offset=offset) - - @classmethod - def now(cls, tz=None): - """ - Timestamp.now(tz=None) - - Returns new Timestamp object representing current time local to - tz. - - Parameters - ---------- - tz : string / timezone object, default None - Timezone to localize to - """ - if is_string_object(tz): - tz = maybe_get_tz(tz) - return cls(datetime.now(tz)) - - @classmethod - def today(cls, tz=None): - """ - Timestamp.today(cls, tz=None) - - Return the current time in the local timezone. This differs - from datetime.today() in that it can be localized to a - passed timezone. - - Parameters - ---------- - tz : string / timezone object, default None - Timezone to localize to - """ - return cls.now(tz) - - @classmethod - def utcnow(cls): - """ - Timestamp.utcnow() - - Return a new Timestamp representing UTC day and time. - """ - return cls.now('UTC') - - @classmethod - def utcfromtimestamp(cls, ts): - """ - Timestamp.utcfromtimestamp(ts) - - Construct a naive UTC datetime from a POSIX timestamp. - """ - return cls(datetime.utcfromtimestamp(ts)) - - @classmethod - def fromtimestamp(cls, ts): - """ - Timestamp.fromtimestamp(ts) - - timestamp[, tz] -> tz's local time from POSIX timestamp. - """ - return cls(datetime.fromtimestamp(ts)) - - @classmethod - def combine(cls, date, time): - """ - Timsetamp.combine(date, time) - - date, time -> datetime with same date and time fields - """ - return cls(datetime.combine(date, time)) - - def __new__(cls, object ts_input=_no_input, - object freq=None, tz=None, unit=None, - year=None, month=None, day=None, - hour=None, minute=None, second=None, microsecond=None, - tzinfo=None, - object offset=None): - # The parameter list folds together legacy parameter names (the first - # four) and positional and keyword parameter names from pydatetime. - # - # There are three calling forms: - # - # - In the legacy form, the first parameter, ts_input, is required - # and may be datetime-like, str, int, or float. The second - # parameter, offset, is optional and may be str or DateOffset. - # - # - ints in the first, second, and third arguments indicate - # pydatetime positional arguments. Only the first 8 arguments - # (standing in for year, month, day, hour, minute, second, - # microsecond, tzinfo) may be non-None. As a shortcut, we just - # check that the second argument is an int. - # - # - Nones for the first four (legacy) arguments indicate pydatetime - # keyword arguments. year, month, and day are required. As a - # shortcut, we just check that the first argument was not passed. - # - # Mixing pydatetime positional and keyword arguments is forbidden! - - cdef _TSObject ts - - if offset is not None: - # deprecate offset kwd in 0.19.0, GH13593 - if freq is not None: - msg = "Can only specify freq or offset, not both" - raise TypeError(msg) - warnings.warn("offset is deprecated. Use freq instead", - FutureWarning) - freq = offset - - if tzinfo is not None: - if not PyTZInfo_Check(tzinfo): - # tzinfo must be a datetime.tzinfo object, GH#17690 - raise TypeError('tzinfo must be a datetime.tzinfo object, ' - 'not %s' % type(tzinfo)) - elif tz is not None: - raise ValueError('Can provide at most one of tz, tzinfo') - - if ts_input is _no_input: - # User passed keyword arguments. - if tz is None: - # Handle the case where the user passes `tz` and not `tzinfo` - tz = tzinfo - return Timestamp(datetime(year, month, day, hour or 0, - minute or 0, second or 0, - microsecond or 0, tzinfo), - tz=tz) - elif is_integer_object(freq): - # User passed positional arguments: - # Timestamp(year, month, day[, hour[, minute[, second[, - # microsecond[, tzinfo]]]]]) - return Timestamp(datetime(ts_input, freq, tz, unit or 0, - year or 0, month or 0, day or 0, - hour), tz=hour) - - if tzinfo is not None: - # User passed tzinfo instead of tz; avoid silently ignoring - tz, tzinfo = tzinfo, None - - ts = convert_to_tsobject(ts_input, tz, unit, 0, 0) - - if ts.value == NPY_NAT: - return NaT - - if is_string_object(freq): - from pandas.tseries.frequencies import to_offset - freq = to_offset(freq) - - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) - - def _round(self, freq, rounder): - - cdef: - int64_t unit, r, value, buff = 1000000 - object result - - from pandas.tseries.frequencies import to_offset - unit = to_offset(freq).nanos - if self.tz is not None: - value = self.tz_localize(None).value - else: - value = self.value - if unit < 1000 and unit % 1000 != 0: - # for nano rounding, work with the last 6 digits separately - # due to float precision - r = (buff * (value // buff) + unit * - (rounder((value % buff) / float(unit))).astype('i8')) - elif unit >= 1000 and unit % 1000 != 0: - msg = 'Precision will be lost using frequency: {}' - warnings.warn(msg.format(freq)) - r = (unit * rounder(value / float(unit)).astype('i8')) - else: - r = (unit * rounder(value / float(unit)).astype('i8')) - result = Timestamp(r, unit='ns') - if self.tz is not None: - result = result.tz_localize(self.tz) - return result - - def round(self, freq): - """ - Round the Timestamp to the specified resolution - - Returns - ------- - a new Timestamp rounded to the given resolution of `freq` - - Parameters - ---------- - freq : a freq string indicating the rounding resolution - - Raises - ------ - ValueError if the freq cannot be converted - """ - return self._round(freq, np.round) - - def floor(self, freq): - """ - return a new Timestamp floored to this resolution - - Parameters - ---------- - freq : a freq string indicating the flooring resolution - """ - return self._round(freq, np.floor) - - def ceil(self, freq): - """ - return a new Timestamp ceiled to this resolution - - Parameters - ---------- - freq : a freq string indicating the ceiling resolution - """ - return self._round(freq, np.ceil) - - @property - def tz(self): - """ - Alias for tzinfo - """ - return self.tzinfo - - @property - def offset(self): - warnings.warn(".offset is deprecated. Use .freq instead", - FutureWarning) - return self.freq - - def __setstate__(self, state): - self.value = state[0] - self.freq = state[1] - self.tzinfo = state[2] - - def __reduce__(self): - object_state = self.value, self.freq, self.tzinfo - return (Timestamp, object_state) - - def to_period(self, freq=None): - """ - Return an period of which this timestamp is an observation. - """ - from pandas import Period - - if freq is None: - freq = self.freq - - return Period(self, freq=freq) - - @property - def dayofweek(self): - return self.weekday() - - @property - def weekday_name(self): - cdef dict wdays = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', - 3: 'Thursday', 4: 'Friday', 5: 'Saturday', - 6: 'Sunday'} - return wdays[self.weekday()] - - @property - def dayofyear(self): - return self._get_field('doy') - - @property - def week(self): - return self._get_field('woy') - - weekofyear = week - - @property - def quarter(self): - return self._get_field('q') - - @property - def days_in_month(self): - return self._get_field('dim') - - daysinmonth = days_in_month - - @property - def freqstr(self): - return getattr(self.freq, 'freqstr', self.freq) - - @property - def is_month_start(self): - return self._get_start_end_field('is_month_start') - - @property - def is_month_end(self): - return self._get_start_end_field('is_month_end') - - @property - def is_quarter_start(self): - return self._get_start_end_field('is_quarter_start') - - @property - def is_quarter_end(self): - return self._get_start_end_field('is_quarter_end') - - @property - def is_year_start(self): - return self._get_start_end_field('is_year_start') - - @property - def is_year_end(self): - return self._get_start_end_field('is_year_end') - - @property - def is_leap_year(self): - return bool(is_leapyear(self.year)) - - def tz_localize(self, tz, ambiguous='raise', errors='raise'): - """ - Convert naive Timestamp to local time zone, or remove - timezone from tz-aware Timestamp. - - Parameters - ---------- - tz : string, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will be converted to. - None will remove timezone holding local time. - - ambiguous : bool, 'NaT', default 'raise' - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time - - errors : 'raise', 'coerce', default 'raise' - - 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified timezone (e.g. due to a transition from - or to DST time) - - 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone - - .. versionadded:: 0.19.0 - - Returns - ------- - localized : Timestamp - - Raises - ------ - TypeError - If the Timestamp is tz-aware and tz is not None. - """ - if ambiguous == 'infer': - raise ValueError('Cannot infer offset with only one time.') - - if self.tzinfo is None: - # tz naive, localize - tz = maybe_get_tz(tz) - if not is_string_object(ambiguous): - ambiguous = [ambiguous] - value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, - ambiguous=ambiguous, errors=errors)[0] - return Timestamp(value, tz=tz) - else: - if tz is None: - # reset tz - value = tz_convert_single(self.value, 'UTC', self.tz) - return Timestamp(value, tz=None) - else: - raise TypeError('Cannot localize tz-aware Timestamp, use ' - 'tz_convert for conversions') - - def tz_convert(self, tz): - """ - Convert tz-aware Timestamp to another time zone. - - Parameters - ---------- - tz : string, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will be converted to. - None will remove timezone holding UTC time. - - Returns - ------- - converted : Timestamp - - Raises - ------ - TypeError - If Timestamp is tz-naive. - """ - if self.tzinfo is None: - # tz naive, use tz_localize - raise TypeError('Cannot convert tz-naive Timestamp, use ' - 'tz_localize to localize') - else: - # Same UTC timestamp, different time zone - return Timestamp(self.value, tz=tz) - - astimezone = tz_convert - - def replace(self, year=None, month=None, day=None, - hour=None, minute=None, second=None, microsecond=None, - nanosecond=None, tzinfo=object, fold=0): - """ - implements datetime.replace, handles nanoseconds - - Parameters - ---------- - year : int, optional - month : int, optional - day : int, optional - hour : int, optional - minute : int, optional - second : int, optional - microsecond : int, optional - nanosecond: int, optional - tzinfo : tz-convertible, optional - fold : int, optional, default is 0 - added in 3.6, NotImplemented - - Returns - ------- - Timestamp with fields replaced - """ - - cdef: - pandas_datetimestruct dts - int64_t value, value_tz, offset - object _tzinfo, result, k, v - datetime ts_input - - # set to naive if needed - _tzinfo = self.tzinfo - value = self.value - if _tzinfo is not None: - value_tz = tz_convert_single(value, _tzinfo, 'UTC') - value += value - value_tz - - # setup components - dt64_to_dtstruct(value, &dts) - dts.ps = self.nanosecond * 1000 - - # replace - def validate(k, v): - """ validate integers """ - if not is_integer_object(v): - raise ValueError("value must be an integer, received " - "{v} for {k}".format(v=type(v), k=k)) - return v - - if year is not None: - dts.year = validate('year', year) - if month is not None: - dts.month = validate('month', month) - if day is not None: - dts.day = validate('day', day) - if hour is not None: - dts.hour = validate('hour', hour) - if minute is not None: - dts.min = validate('minute', minute) - if second is not None: - dts.sec = validate('second', second) - if microsecond is not None: - dts.us = validate('microsecond', microsecond) - if nanosecond is not None: - dts.ps = validate('nanosecond', nanosecond) * 1000 - if tzinfo is not object: - _tzinfo = tzinfo - - # reconstruct & check bounds - ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tzinfo=_tzinfo) - ts = convert_datetime_to_tsobject(ts_input, _tzinfo) - value = ts.value + (dts.ps // 1000) - if value != NPY_NAT: - check_dts_bounds(&dts) - - return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) - - def isoformat(self, sep='T'): - base = super(_Timestamp, self).isoformat(sep=sep) - if self.nanosecond == 0: - return base - - if self.tzinfo is not None: - base1, base2 = base[:-6], base[-6:] - else: - base1, base2 = base, "" - - if self.microsecond != 0: - base1 += "%.3d" % self.nanosecond - else: - base1 += ".%.9d" % self.nanosecond - - return base1 + base2 - - def _has_time_component(self): - """ - Returns if the Timestamp has a time component - in addition to the date part - """ - return (self.time() != _zero_time - or self.tzinfo is not None - or self.nanosecond != 0) - - def to_julian_date(self): - """ - Convert TimeStamp to a Julian Date. - 0 Julian date is noon January 1, 4713 BC. - """ - year = self.year - month = self.month - day = self.day - if month <= 2: - year -= 1 - month += 12 - return (day + - np.fix((153 * month - 457) / 5) + - 365 * year + - np.floor(year / 4) - - np.floor(year / 100) + - np.floor(year / 400) + - 1721118.5 + - (self.hour + - self.minute / 60.0 + - self.second / 3600.0 + - self.microsecond / 3600.0 / 1e+6 + - self.nanosecond / 3600.0 / 1e+9 - ) / 24.0) - - def normalize(self): - """ - Normalize Timestamp to midnight, preserving - tz information. - """ - normalized_value = date_normalize( - np.array([self.value], dtype='i8'), tz=self.tz)[0] - return Timestamp(normalized_value).tz_localize(self.tz) - - def __radd__(self, other): - # __radd__ on cython extension types like _Timestamp is not used, so - # define it here instead - return self + other - - -# ---------------------------------------------------------------------- - - cdef inline bint _check_all_nulls(object val): """ utility to check if a value is any type of null """ cdef bint res @@ -847,331 +200,6 @@ cdef inline bint _check_all_nulls(object val): return res -# Add the min and max fields at the class level -cdef int64_t _NS_UPPER_BOUND = INT64_MAX -# the smallest value we could actually represent is -# INT64_MIN + 1 == -9223372036854775807 -# but to allow overflow free conversion with a microsecond resolution -# use the smallest value with a 0 nanosecond unit (0s in last 3 digits) -cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 - -# Resolution is in nanoseconds -Timestamp.min = Timestamp(_NS_LOWER_BOUND) -Timestamp.max = Timestamp(_NS_UPPER_BOUND) - - -cdef str _NDIM_STRING = "ndim" - -# This is PITA. Because we inherit from datetime, which has very specific -# construction requirements, we need to do object instantiation in python -# (see Timestamp class above). This will serve as a C extension type that -# shadows the python class, where we do any heavy lifting. -cdef class _Timestamp(datetime): - - cdef readonly: - int64_t value, nanosecond - object freq # frequency reference - - def __hash__(_Timestamp self): - if self.nanosecond: - return hash(self.value) - return datetime.__hash__(self) - - def __richcmp__(_Timestamp self, object other, int op): - cdef: - _Timestamp ots - int ndim - - if isinstance(other, _Timestamp): - ots = other - elif other is NaT: - return op == Py_NE - elif PyDateTime_Check(other): - if self.nanosecond == 0: - val = self.to_pydatetime() - return PyObject_RichCompareBool(val, other, op) - - try: - ots = Timestamp(other) - except ValueError: - return self._compare_outside_nanorange(other, op) - else: - ndim = getattr(other, _NDIM_STRING, -1) - - if ndim != -1: - if ndim == 0: - if is_datetime64_object(other): - other = Timestamp(other) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - - # only allow ==, != ops - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, - type(other).__name__)) - return PyObject_RichCompare(other, self, reverse_ops[op]) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) - - self._assert_tzawareness_compat(other) - return cmp_scalar(self.value, ots.value, op) - - def __reduce_ex__(self, protocol): - # python 3.6 compat - # http://bugs.python.org/issue28730 - # now __reduce_ex__ is defined and higher priority than __reduce__ - return self.__reduce__() - - def __repr__(self): - stamp = self._repr_base - zone = None - - try: - stamp += self.strftime('%z') - if self.tzinfo: - zone = get_timezone(self.tzinfo) - except ValueError: - year2000 = self.replace(year=2000) - stamp += year2000.strftime('%z') - if self.tzinfo: - zone = get_timezone(self.tzinfo) - - try: - stamp += zone.strftime(' %%Z') - except: - pass - - tz = ", tz='{0}'".format(zone) if zone is not None else "" - freq = "" if self.freq is None else ", freq='{0}'".format(self.freqstr) - - return "Timestamp('{stamp}'{tz}{freq})".format(stamp=stamp, - tz=tz, freq=freq) - - cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, - int op) except -1: - cdef datetime dtval = self.to_pydatetime() - - self._assert_tzawareness_compat(other) - - if self.nanosecond == 0: - return PyObject_RichCompareBool(dtval, other, op) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - elif op == Py_LT: - return dtval < other - elif op == Py_LE: - return dtval < other - elif op == Py_GT: - return dtval >= other - elif op == Py_GE: - return dtval >= other - - cdef int _assert_tzawareness_compat(_Timestamp self, - object other) except -1: - if self.tzinfo is None: - if other.tzinfo is not None: - raise TypeError('Cannot compare tz-naive and tz-aware ' - 'timestamps') - elif other.tzinfo is None: - raise TypeError('Cannot compare tz-naive and tz-aware timestamps') - - cpdef datetime to_datetime(_Timestamp self): - """ - DEPRECATED: use :meth:`to_pydatetime` instead. - - Convert a Timestamp object to a native Python datetime object. - """ - warnings.warn("to_datetime is deprecated. Use self.to_pydatetime()", - FutureWarning, stacklevel=2) - return self.to_pydatetime(warn=False) - - cpdef datetime to_pydatetime(_Timestamp self, warn=True): - """ - Convert a Timestamp object to a native Python datetime object. - - If warn=True, issue a warning if nanoseconds is nonzero. - """ - if self.nanosecond != 0 and warn: - warnings.warn("Discarding nonzero nanoseconds in conversion", - UserWarning, stacklevel=2) - - return datetime(self.year, self.month, self.day, - self.hour, self.minute, self.second, - self.microsecond, self.tzinfo) - - cpdef to_datetime64(self): - """ Returns a numpy.datetime64 object with 'ns' precision """ - return np.datetime64(self.value, 'ns') - - def __add__(self, other): - cdef int64_t other_int, nanos - - if is_timedelta64_object(other): - other_int = other.astype('timedelta64[ns]').view('i8') - return Timestamp(self.value + other_int, - tz=self.tzinfo, freq=self.freq) - - elif is_integer_object(other): - if self is NaT: - # to be compat with Period - return NaT - elif self.freq is None: - raise ValueError("Cannot add integral value to Timestamp " - "without freq.") - return Timestamp((self.freq * other).apply(self), freq=self.freq) - - elif PyDelta_Check(other) or hasattr(other, 'delta'): - # delta --> offsets.Tick - nanos = delta_to_nanoseconds(other) - result = Timestamp(self.value + nanos, - tz=self.tzinfo, freq=self.freq) - if getattr(other, 'normalize', False): - # DateOffset - result = result.normalize() - return result - - # index/series like - elif hasattr(other, '_typ'): - return NotImplemented - - result = datetime.__add__(self, other) - if PyDateTime_Check(result): - result = Timestamp(result) - result.nanosecond = self.nanosecond - return result - - def __sub__(self, other): - if (is_timedelta64_object(other) or is_integer_object(other) or - PyDelta_Check(other) or hasattr(other, 'delta')): - # `delta` attribute is for offsets.Tick or offsets.Week obj - neg_other = -other - return self + neg_other - - # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex - elif getattr(other, '_typ', None) == 'datetimeindex': - # timezone comparison is performed in DatetimeIndex._sub_datelike - return -other.__sub__(self) - - # a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex - elif getattr(other, '_typ', None) == 'timedeltaindex': - return (-other).__add__(self) - - elif other is NaT: - return NaT - - # coerce if necessary if we are a Timestamp-like - if (PyDateTime_Check(self) - and (PyDateTime_Check(other) or is_datetime64_object(other))): - self = Timestamp(self) - other = Timestamp(other) - - # validate tz's - if get_timezone(self.tzinfo) != get_timezone(other.tzinfo): - raise TypeError("Timestamp subtraction must have the " - "same timezones or no timezones") - - # scalar Timestamp/datetime - Timestamp/datetime -> yields a - # Timedelta - try: - return Timedelta(self.value - other.value) - except (OverflowError, OutOfBoundsDatetime): - pass - - # scalar Timestamp/datetime - Timedelta -> yields a Timestamp (with - # same timezone if specified) - return datetime.__sub__(self, other) - - cdef int64_t _maybe_convert_value_to_local(self): - """Convert UTC i8 value to local i8 value if tz exists""" - cdef: - int64_t val - val = self.value - if self.tz is not None and not is_utc(self.tz): - val = tz_convert_single(self.value, 'UTC', self.tz) - return val - - cpdef int _get_field(self, field): - cdef: - int64_t val - ndarray[int32_t] out - val = self._maybe_convert_value_to_local() - out = get_date_field(np.array([val], dtype=np.int64), field) - return int(out[0]) - - cpdef _get_start_end_field(self, field): - cdef: - int64_t val - dict kwds - - freq = self.freq - if freq: - kwds = freq.kwds - month_kw = kwds.get('startingMonth', kwds.get('month', 12)) - freqstr = self.freqstr - else: - month_kw = 12 - freqstr = None - - val = self._maybe_convert_value_to_local() - out = get_start_end_field(np.array([val], dtype=np.int64), - field, freqstr, month_kw) - return out[0] - - @property - def _repr_base(self): - return '{date} {time}'.format(date=self._date_repr, - time=self._time_repr) - - @property - def _date_repr(self): - # Ideal here would be self.strftime("%Y-%m-%d"), but - # the datetime strftime() methods require year >= 1900 - return '%d-%.2d-%.2d' % (self.year, self.month, self.day) - - @property - def _time_repr(self): - result = '%.2d:%.2d:%.2d' % (self.hour, self.minute, self.second) - - if self.nanosecond != 0: - result += '.%.9d' % (self.nanosecond + 1000 * self.microsecond) - elif self.microsecond != 0: - result += '.%.6d' % self.microsecond - - return result - - @property - def _short_repr(self): - # format a Timestamp with only _date_repr if possible - # otherwise _repr_base - if (self.hour == 0 and - self.minute == 0 and - self.second == 0 and - self.microsecond == 0 and - self.nanosecond == 0): - return self._date_repr - return self._repr_base - - @property - def asm8(self): - return np.datetime64(self.value, 'ns') - - def timestamp(self): - """Return POSIX timestamp as float.""" - # py27 compat, see GH#17329 - return round(self.value / 1e9, 6) - - cdef PyTypeObject* ts_type = Timestamp diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd new file mode 100644 index 0000000000000..8e7380b37209e --- /dev/null +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +from numpy cimport int64_t +from np_datetime cimport pandas_datetimestruct + +cdef object create_timestamp_from_ts(int64_t value, + pandas_datetimestruct dts, + object tz, object freq) + +cdef int64_t _NS_UPPER_BOUND, _NS_LOWER_BOUND diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx new file mode 100644 index 0000000000000..8fdded0bcb07a --- /dev/null +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -0,0 +1,995 @@ +# -*- coding: utf-8 -*- +# cython: profile=False +import warnings + +from cpython cimport (PyObject_RichCompareBool, PyObject_RichCompare, + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE) + +import numpy as np +cimport numpy as np +from numpy cimport int64_t, int32_t, ndarray +np.import_array() + +from datetime import time as datetime_time +from cpython.datetime cimport (datetime, + PyDateTime_Check, PyDelta_Check, PyTZInfo_Check, + PyDateTime_IMPORT) +PyDateTime_IMPORT + +from util cimport (is_datetime64_object, is_timedelta64_object, + is_integer_object, is_string_object, + INT64_MAX) + +from conversion import tz_localize_to_utc, date_normalize +from conversion cimport (tz_convert_single, _TSObject, + convert_to_tsobject, convert_datetime_to_tsobject) +from fields import get_date_field, get_start_end_field +from nattype import NaT +from nattype cimport NPY_NAT +from np_datetime import OutOfBoundsDatetime +from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds, + pandas_datetimestruct, dt64_to_dtstruct, + is_leapyear) +from timedeltas import Timedelta +from timedeltas cimport delta_to_nanoseconds +from timezones cimport get_timezone, is_utc, maybe_get_tz + +# ---------------------------------------------------------------------- +# Constants +_zero_time = datetime_time(0, 0) +_no_input = object() + +# ---------------------------------------------------------------------- + + +cdef inline object create_timestamp_from_ts(int64_t value, + pandas_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a Timestamp from its parts """ + cdef _Timestamp ts_base + ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, + dts.day, dts.hour, dts.min, + dts.sec, dts.us, tz) + ts_base.value = value + ts_base.freq = freq + ts_base.nanosecond = dts.ps / 1000 + + return ts_base + + +# This is PITA. Because we inherit from datetime, which has very specific +# construction requirements, we need to do object instantiation in python +# (see Timestamp class above). This will serve as a C extension type that +# shadows the python class, where we do any heavy lifting. +cdef class _Timestamp(datetime): + + cdef readonly: + int64_t value, nanosecond + object freq # frequency reference + + def __hash__(_Timestamp self): + if self.nanosecond: + return hash(self.value) + return datetime.__hash__(self) + + def __richcmp__(_Timestamp self, object other, int op): + cdef: + _Timestamp ots + int ndim + + if isinstance(other, _Timestamp): + ots = other + elif other is NaT: + return op == Py_NE + elif PyDateTime_Check(other): + if self.nanosecond == 0: + val = self.to_pydatetime() + return PyObject_RichCompareBool(val, other, op) + + try: + ots = Timestamp(other) + except ValueError: + return self._compare_outside_nanorange(other, op) + else: + ndim = getattr(other, "ndim", -1) + + if ndim != -1: + if ndim == 0: + if is_datetime64_object(other): + other = Timestamp(other) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + + # only allow ==, != ops + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, + type(other).__name__)) + return PyObject_RichCompare(other, self, reverse_ops[op]) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, type(other).__name__)) + + self._assert_tzawareness_compat(other) + return cmp_scalar(self.value, ots.value, op) + + def __reduce_ex__(self, protocol): + # python 3.6 compat + # http://bugs.python.org/issue28730 + # now __reduce_ex__ is defined and higher priority than __reduce__ + return self.__reduce__() + + def __repr__(self): + stamp = self._repr_base + zone = None + + try: + stamp += self.strftime('%z') + if self.tzinfo: + zone = get_timezone(self.tzinfo) + except ValueError: + year2000 = self.replace(year=2000) + stamp += year2000.strftime('%z') + if self.tzinfo: + zone = get_timezone(self.tzinfo) + + try: + stamp += zone.strftime(' %%Z') + except: + pass + + tz = ", tz='{0}'".format(zone) if zone is not None else "" + freq = "" if self.freq is None else ", freq='{0}'".format(self.freqstr) + + return "Timestamp('{stamp}'{tz}{freq})".format(stamp=stamp, + tz=tz, freq=freq) + + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1: + cdef datetime dtval = self.to_pydatetime() + + self._assert_tzawareness_compat(other) + + if self.nanosecond == 0: + return PyObject_RichCompareBool(dtval, other, op) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + elif op == Py_LT: + return dtval < other + elif op == Py_LE: + return dtval < other + elif op == Py_GT: + return dtval >= other + elif op == Py_GE: + return dtval >= other + + cdef int _assert_tzawareness_compat(_Timestamp self, + object other) except -1: + if self.tzinfo is None: + if other.tzinfo is not None: + raise TypeError('Cannot compare tz-naive and tz-aware ' + 'timestamps') + elif other.tzinfo is None: + raise TypeError('Cannot compare tz-naive and tz-aware timestamps') + + cpdef datetime to_datetime(_Timestamp self): + """ + DEPRECATED: use :meth:`to_pydatetime` instead. + + Convert a Timestamp object to a native Python datetime object. + """ + warnings.warn("to_datetime is deprecated. Use self.to_pydatetime()", + FutureWarning, stacklevel=2) + return self.to_pydatetime(warn=False) + + cpdef datetime to_pydatetime(_Timestamp self, warn=True): + """ + Convert a Timestamp object to a native Python datetime object. + + If warn=True, issue a warning if nanoseconds is nonzero. + """ + if self.nanosecond != 0 and warn: + warnings.warn("Discarding nonzero nanoseconds in conversion", + UserWarning, stacklevel=2) + + return datetime(self.year, self.month, self.day, + self.hour, self.minute, self.second, + self.microsecond, self.tzinfo) + + cpdef to_datetime64(self): + """ Returns a numpy.datetime64 object with 'ns' precision """ + return np.datetime64(self.value, 'ns') + + def __add__(self, other): + cdef int64_t other_int, nanos + + if is_timedelta64_object(other): + other_int = other.astype('timedelta64[ns]').view('i8') + return Timestamp(self.value + other_int, + tz=self.tzinfo, freq=self.freq) + + elif is_integer_object(other): + if self is NaT: + # to be compat with Period + return NaT + elif self.freq is None: + raise ValueError("Cannot add integral value to Timestamp " + "without freq.") + return Timestamp((self.freq * other).apply(self), freq=self.freq) + + elif PyDelta_Check(other) or hasattr(other, 'delta'): + # delta --> offsets.Tick + nanos = delta_to_nanoseconds(other) + result = Timestamp(self.value + nanos, + tz=self.tzinfo, freq=self.freq) + if getattr(other, 'normalize', False): + # DateOffset + result = result.normalize() + return result + + # index/series like + elif hasattr(other, '_typ'): + return NotImplemented + + result = datetime.__add__(self, other) + if PyDateTime_Check(result): + result = Timestamp(result) + result.nanosecond = self.nanosecond + return result + + def __sub__(self, other): + if (is_timedelta64_object(other) or is_integer_object(other) or + PyDelta_Check(other) or hasattr(other, 'delta')): + # `delta` attribute is for offsets.Tick or offsets.Week obj + neg_other = -other + return self + neg_other + + # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex + elif getattr(other, '_typ', None) == 'datetimeindex': + # timezone comparison is performed in DatetimeIndex._sub_datelike + return -other.__sub__(self) + + # a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex + elif getattr(other, '_typ', None) == 'timedeltaindex': + return (-other).__add__(self) + + elif other is NaT: + return NaT + + # coerce if necessary if we are a Timestamp-like + if (PyDateTime_Check(self) + and (PyDateTime_Check(other) or is_datetime64_object(other))): + self = Timestamp(self) + other = Timestamp(other) + + # validate tz's + if get_timezone(self.tzinfo) != get_timezone(other.tzinfo): + raise TypeError("Timestamp subtraction must have the " + "same timezones or no timezones") + + # scalar Timestamp/datetime - Timestamp/datetime -> yields a + # Timedelta + try: + return Timedelta(self.value - other.value) + except (OverflowError, OutOfBoundsDatetime): + pass + + # scalar Timestamp/datetime - Timedelta -> yields a Timestamp (with + # same timezone if specified) + return datetime.__sub__(self, other) + + cdef int64_t _maybe_convert_value_to_local(self): + """Convert UTC i8 value to local i8 value if tz exists""" + cdef: + int64_t val + val = self.value + if self.tz is not None and not is_utc(self.tz): + val = tz_convert_single(self.value, 'UTC', self.tz) + return val + + cpdef int _get_field(self, field): + cdef: + int64_t val + ndarray[int32_t] out + val = self._maybe_convert_value_to_local() + out = get_date_field(np.array([val], dtype=np.int64), field) + return int(out[0]) + + cpdef _get_start_end_field(self, field): + cdef: + int64_t val + dict kwds + + freq = self.freq + if freq: + kwds = freq.kwds + month_kw = kwds.get('startingMonth', kwds.get('month', 12)) + freqstr = self.freqstr + else: + month_kw = 12 + freqstr = None + + val = self._maybe_convert_value_to_local() + out = get_start_end_field(np.array([val], dtype=np.int64), + field, freqstr, month_kw) + return out[0] + + @property + def _repr_base(self): + return '{date} {time}'.format(date=self._date_repr, + time=self._time_repr) + + @property + def _date_repr(self): + # Ideal here would be self.strftime("%Y-%m-%d"), but + # the datetime strftime() methods require year >= 1900 + return '%d-%.2d-%.2d' % (self.year, self.month, self.day) + + @property + def _time_repr(self): + result = '%.2d:%.2d:%.2d' % (self.hour, self.minute, self.second) + + if self.nanosecond != 0: + result += '.%.9d' % (self.nanosecond + 1000 * self.microsecond) + elif self.microsecond != 0: + result += '.%.6d' % self.microsecond + + return result + + @property + def _short_repr(self): + # format a Timestamp with only _date_repr if possible + # otherwise _repr_base + if (self.hour == 0 and + self.minute == 0 and + self.second == 0 and + self.microsecond == 0 and + self.nanosecond == 0): + return self._date_repr + return self._repr_base + + @property + def asm8(self): + return np.datetime64(self.value, 'ns') + + def timestamp(self): + """Return POSIX timestamp as float.""" + # py27 compat, see GH#17329 + return round(self.value / 1e9, 6) + + +# ---------------------------------------------------------------------- + +# Python front end to C extension type _Timestamp +# This serves as the box for datetime64 + + +class Timestamp(_Timestamp): + """Pandas replacement for datetime.datetime + + TimeStamp is the pandas equivalent of python's Datetime + and is interchangable with it in most cases. It's the type used + for the entries that make up a DatetimeIndex, and other timeseries + oriented data structures in pandas. + + Parameters + ---------- + ts_input : datetime-like, str, int, float + Value to be converted to Timestamp + freq : str, DateOffset + Offset which Timestamp will have + tz : string, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will have. + unit : string + numpy unit used for conversion, if ts_input is int or float + offset : str, DateOffset + Deprecated, use freq + + year, month, day : int + .. versionadded:: 0.19.0 + hour, minute, second, microsecond : int, optional, default 0 + .. versionadded:: 0.19.0 + tzinfo : datetime.tzinfo, optional, default None + .. versionadded:: 0.19.0 + + Notes + ----- + There are essentially three calling conventions for the constructor. The + primary form accepts four parameters. They can be passed by position or + keyword. + + The other two forms mimic the parameters from ``datetime.datetime``. They + can be passed by either position or keyword, but not both mixed together. + + Examples + -------- + >>> pd.Timestamp('2017-01-01T12') + Timestamp('2017-01-01 12:00:00') + + >>> pd.Timestamp(2017, 1, 1, 12) + Timestamp('2017-01-01 12:00:00') + + >>> pd.Timestamp(year=2017, month=1, day=1, hour=12) + Timestamp('2017-01-01 12:00:00') + """ + + @classmethod + def fromordinal(cls, ordinal, freq=None, tz=None, offset=None): + """ + Timestamp.fromordinal(ordinal, freq=None, tz=None, offset=None) + + passed an ordinal, translate and convert to a ts + note: by definition there cannot be any tz info on the ordinal itself + + Parameters + ---------- + ordinal : int + date corresponding to a proleptic Gregorian ordinal + freq : str, DateOffset + Offset which Timestamp will have + tz : string, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will have. + offset : str, DateOffset + Deprecated, use freq + """ + return cls(datetime.fromordinal(ordinal), + freq=freq, tz=tz, offset=offset) + + @classmethod + def now(cls, tz=None): + """ + Timestamp.now(tz=None) + + Returns new Timestamp object representing current time local to + tz. + + Parameters + ---------- + tz : string / timezone object, default None + Timezone to localize to + """ + if is_string_object(tz): + tz = maybe_get_tz(tz) + return cls(datetime.now(tz)) + + @classmethod + def today(cls, tz=None): + """ + Timestamp.today(cls, tz=None) + + Return the current time in the local timezone. This differs + from datetime.today() in that it can be localized to a + passed timezone. + + Parameters + ---------- + tz : string / timezone object, default None + Timezone to localize to + """ + return cls.now(tz) + + @classmethod + def utcnow(cls): + """ + Timestamp.utcnow() + + Return a new Timestamp representing UTC day and time. + """ + return cls.now('UTC') + + @classmethod + def utcfromtimestamp(cls, ts): + """ + Timestamp.utcfromtimestamp(ts) + + Construct a naive UTC datetime from a POSIX timestamp. + """ + return cls(datetime.utcfromtimestamp(ts)) + + @classmethod + def fromtimestamp(cls, ts): + """ + Timestamp.fromtimestamp(ts) + + timestamp[, tz] -> tz's local time from POSIX timestamp. + """ + return cls(datetime.fromtimestamp(ts)) + + @classmethod + def combine(cls, date, time): + """ + Timsetamp.combine(date, time) + + date, time -> datetime with same date and time fields + """ + return cls(datetime.combine(date, time)) + + def __new__(cls, object ts_input=_no_input, + object freq=None, tz=None, unit=None, + year=None, month=None, day=None, + hour=None, minute=None, second=None, microsecond=None, + tzinfo=None, + object offset=None): + # The parameter list folds together legacy parameter names (the first + # four) and positional and keyword parameter names from pydatetime. + # + # There are three calling forms: + # + # - In the legacy form, the first parameter, ts_input, is required + # and may be datetime-like, str, int, or float. The second + # parameter, offset, is optional and may be str or DateOffset. + # + # - ints in the first, second, and third arguments indicate + # pydatetime positional arguments. Only the first 8 arguments + # (standing in for year, month, day, hour, minute, second, + # microsecond, tzinfo) may be non-None. As a shortcut, we just + # check that the second argument is an int. + # + # - Nones for the first four (legacy) arguments indicate pydatetime + # keyword arguments. year, month, and day are required. As a + # shortcut, we just check that the first argument was not passed. + # + # Mixing pydatetime positional and keyword arguments is forbidden! + + cdef _TSObject ts + + if offset is not None: + # deprecate offset kwd in 0.19.0, GH13593 + if freq is not None: + msg = "Can only specify freq or offset, not both" + raise TypeError(msg) + warnings.warn("offset is deprecated. Use freq instead", + FutureWarning) + freq = offset + + if tzinfo is not None: + if not PyTZInfo_Check(tzinfo): + # tzinfo must be a datetime.tzinfo object, GH#17690 + raise TypeError('tzinfo must be a datetime.tzinfo object, ' + 'not %s' % type(tzinfo)) + elif tz is not None: + raise ValueError('Can provide at most one of tz, tzinfo') + + if ts_input is _no_input: + # User passed keyword arguments. + if tz is None: + # Handle the case where the user passes `tz` and not `tzinfo` + tz = tzinfo + return Timestamp(datetime(year, month, day, hour or 0, + minute or 0, second or 0, + microsecond or 0, tzinfo), + tz=tz) + elif is_integer_object(freq): + # User passed positional arguments: + # Timestamp(year, month, day[, hour[, minute[, second[, + # microsecond[, tzinfo]]]]]) + return Timestamp(datetime(ts_input, freq, tz, unit or 0, + year or 0, month or 0, day or 0, + hour), tz=hour) + + if tzinfo is not None: + # User passed tzinfo instead of tz; avoid silently ignoring + tz, tzinfo = tzinfo, None + + ts = convert_to_tsobject(ts_input, tz, unit, 0, 0) + + if ts.value == NPY_NAT: + return NaT + + if is_string_object(freq): + from pandas.tseries.frequencies import to_offset + freq = to_offset(freq) + + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) + + def _round(self, freq, rounder): + + cdef: + int64_t unit, r, value, buff = 1000000 + object result + + from pandas.tseries.frequencies import to_offset + unit = to_offset(freq).nanos + if self.tz is not None: + value = self.tz_localize(None).value + else: + value = self.value + if unit < 1000 and unit % 1000 != 0: + # for nano rounding, work with the last 6 digits separately + # due to float precision + r = (buff * (value // buff) + unit * + (rounder((value % buff) / float(unit))).astype('i8')) + elif unit >= 1000 and unit % 1000 != 0: + msg = 'Precision will be lost using frequency: {}' + warnings.warn(msg.format(freq)) + r = (unit * rounder(value / float(unit)).astype('i8')) + else: + r = (unit * rounder(value / float(unit)).astype('i8')) + result = Timestamp(r, unit='ns') + if self.tz is not None: + result = result.tz_localize(self.tz) + return result + + def round(self, freq): + """ + Round the Timestamp to the specified resolution + + Returns + ------- + a new Timestamp rounded to the given resolution of `freq` + + Parameters + ---------- + freq : a freq string indicating the rounding resolution + + Raises + ------ + ValueError if the freq cannot be converted + """ + return self._round(freq, np.round) + + def floor(self, freq): + """ + return a new Timestamp floored to this resolution + + Parameters + ---------- + freq : a freq string indicating the flooring resolution + """ + return self._round(freq, np.floor) + + def ceil(self, freq): + """ + return a new Timestamp ceiled to this resolution + + Parameters + ---------- + freq : a freq string indicating the ceiling resolution + """ + return self._round(freq, np.ceil) + + @property + def tz(self): + """ + Alias for tzinfo + """ + return self.tzinfo + + @property + def offset(self): + warnings.warn(".offset is deprecated. Use .freq instead", + FutureWarning) + return self.freq + + def __setstate__(self, state): + self.value = state[0] + self.freq = state[1] + self.tzinfo = state[2] + + def __reduce__(self): + object_state = self.value, self.freq, self.tzinfo + return (Timestamp, object_state) + + def to_period(self, freq=None): + """ + Return an period of which this timestamp is an observation. + """ + from pandas import Period + + if freq is None: + freq = self.freq + + return Period(self, freq=freq) + + @property + def dayofweek(self): + return self.weekday() + + @property + def weekday_name(self): + cdef dict wdays = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', + 3: 'Thursday', 4: 'Friday', 5: 'Saturday', + 6: 'Sunday'} + return wdays[self.weekday()] + + @property + def dayofyear(self): + return self._get_field('doy') + + @property + def week(self): + return self._get_field('woy') + + weekofyear = week + + @property + def quarter(self): + return self._get_field('q') + + @property + def days_in_month(self): + return self._get_field('dim') + + daysinmonth = days_in_month + + @property + def freqstr(self): + return getattr(self.freq, 'freqstr', self.freq) + + @property + def is_month_start(self): + return self._get_start_end_field('is_month_start') + + @property + def is_month_end(self): + return self._get_start_end_field('is_month_end') + + @property + def is_quarter_start(self): + return self._get_start_end_field('is_quarter_start') + + @property + def is_quarter_end(self): + return self._get_start_end_field('is_quarter_end') + + @property + def is_year_start(self): + return self._get_start_end_field('is_year_start') + + @property + def is_year_end(self): + return self._get_start_end_field('is_year_end') + + @property + def is_leap_year(self): + return bool(is_leapyear(self.year)) + + def tz_localize(self, tz, ambiguous='raise', errors='raise'): + """ + Convert naive Timestamp to local time zone, or remove + timezone from tz-aware Timestamp. + + Parameters + ---------- + tz : string, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding local time. + + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + + errors : 'raise', 'coerce', default 'raise' + - 'raise' will raise a NonExistentTimeError if a timestamp is not + valid in the specified timezone (e.g. due to a transition from + or to DST time) + - 'coerce' will return NaT if the timestamp can not be converted + into the specified timezone + + .. versionadded:: 0.19.0 + + Returns + ------- + localized : Timestamp + + Raises + ------ + TypeError + If the Timestamp is tz-aware and tz is not None. + """ + if ambiguous == 'infer': + raise ValueError('Cannot infer offset with only one time.') + + if self.tzinfo is None: + # tz naive, localize + tz = maybe_get_tz(tz) + if not is_string_object(ambiguous): + ambiguous = [ambiguous] + value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, + ambiguous=ambiguous, errors=errors)[0] + return Timestamp(value, tz=tz) + else: + if tz is None: + # reset tz + value = tz_convert_single(self.value, 'UTC', self.tz) + return Timestamp(value, tz=None) + else: + raise TypeError('Cannot localize tz-aware Timestamp, use ' + 'tz_convert for conversions') + + def tz_convert(self, tz): + """ + Convert tz-aware Timestamp to another time zone. + + Parameters + ---------- + tz : string, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + """ + if self.tzinfo is None: + # tz naive, use tz_localize + raise TypeError('Cannot convert tz-naive Timestamp, use ' + 'tz_localize to localize') + else: + # Same UTC timestamp, different time zone + return Timestamp(self.value, tz=tz) + + astimezone = tz_convert + + def replace(self, year=None, month=None, day=None, + hour=None, minute=None, second=None, microsecond=None, + nanosecond=None, tzinfo=object, fold=0): + """ + implements datetime.replace, handles nanoseconds + + Parameters + ---------- + year : int, optional + month : int, optional + day : int, optional + hour : int, optional + minute : int, optional + second : int, optional + microsecond : int, optional + nanosecond: int, optional + tzinfo : tz-convertible, optional + fold : int, optional, default is 0 + added in 3.6, NotImplemented + + Returns + ------- + Timestamp with fields replaced + """ + + cdef: + pandas_datetimestruct dts + int64_t value, value_tz, offset + object _tzinfo, result, k, v + datetime ts_input + + # set to naive if needed + _tzinfo = self.tzinfo + value = self.value + if _tzinfo is not None: + value_tz = tz_convert_single(value, _tzinfo, 'UTC') + value += value - value_tz + + # setup components + dt64_to_dtstruct(value, &dts) + dts.ps = self.nanosecond * 1000 + + # replace + def validate(k, v): + """ validate integers """ + if not is_integer_object(v): + raise ValueError("value must be an integer, received " + "{v} for {k}".format(v=type(v), k=k)) + return v + + if year is not None: + dts.year = validate('year', year) + if month is not None: + dts.month = validate('month', month) + if day is not None: + dts.day = validate('day', day) + if hour is not None: + dts.hour = validate('hour', hour) + if minute is not None: + dts.min = validate('minute', minute) + if second is not None: + dts.sec = validate('second', second) + if microsecond is not None: + dts.us = validate('microsecond', microsecond) + if nanosecond is not None: + dts.ps = validate('nanosecond', nanosecond) * 1000 + if tzinfo is not object: + _tzinfo = tzinfo + + # reconstruct & check bounds + ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, + dts.sec, dts.us, tzinfo=_tzinfo) + ts = convert_datetime_to_tsobject(ts_input, _tzinfo) + value = ts.value + (dts.ps // 1000) + if value != NPY_NAT: + check_dts_bounds(&dts) + + return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) + + def isoformat(self, sep='T'): + base = super(_Timestamp, self).isoformat(sep=sep) + if self.nanosecond == 0: + return base + + if self.tzinfo is not None: + base1, base2 = base[:-6], base[-6:] + else: + base1, base2 = base, "" + + if self.microsecond != 0: + base1 += "%.3d" % self.nanosecond + else: + base1 += ".%.9d" % self.nanosecond + + return base1 + base2 + + def _has_time_component(self): + """ + Returns if the Timestamp has a time component + in addition to the date part + """ + return (self.time() != _zero_time + or self.tzinfo is not None + or self.nanosecond != 0) + + def to_julian_date(self): + """ + Convert TimeStamp to a Julian Date. + 0 Julian date is noon January 1, 4713 BC. + """ + year = self.year + month = self.month + day = self.day + if month <= 2: + year -= 1 + month += 12 + return (day + + np.fix((153 * month - 457) / 5) + + 365 * year + + np.floor(year / 4) - + np.floor(year / 100) + + np.floor(year / 400) + + 1721118.5 + + (self.hour + + self.minute / 60.0 + + self.second / 3600.0 + + self.microsecond / 3600.0 / 1e+6 + + self.nanosecond / 3600.0 / 1e+9 + ) / 24.0) + + def normalize(self): + """ + Normalize Timestamp to midnight, preserving + tz information. + """ + normalized_value = date_normalize( + np.array([self.value], dtype='i8'), tz=self.tz)[0] + return Timestamp(normalized_value).tz_localize(self.tz) + + def __radd__(self, other): + # __radd__ on cython extension types like _Timestamp is not used, so + # define it here instead + return self + other + + +# Add the min and max fields at the class level +cdef int64_t _NS_UPPER_BOUND = INT64_MAX +# the smallest value we could actually represent is +# INT64_MIN + 1 == -9223372036854775807 +# but to allow overflow free conversion with a microsecond resolution +# use the smallest value with a 0 nanosecond unit (0s in last 3 digits) +cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 + +# Resolution is in nanoseconds +Timestamp.min = Timestamp(_NS_LOWER_BOUND) +Timestamp.max = Timestamp(_NS_UPPER_BOUND) diff --git a/setup.py b/setup.py index 44e7de1665bf0..c5ec33b8d5064 100755 --- a/setup.py +++ b/setup.py @@ -345,6 +345,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/tslibs/strptime.pyx', 'pandas/_libs/tslibs/np_datetime.pyx', 'pandas/_libs/tslibs/timedeltas.pyx', + 'pandas/_libs/tslibs/timestamps.pyx', 'pandas/_libs/tslibs/timezones.pyx', 'pandas/_libs/tslibs/conversion.pyx', 'pandas/_libs/tslibs/fields.pyx', @@ -546,6 +547,7 @@ def pxd(name): '_libs/src/khash', '_libs/tslibs/conversion', '_libs/tslibs/timedeltas', + '_libs/tslibs/timestamps', '_libs/tslibs/timezones', '_libs/tslibs/nattype'], 'depends': tseries_depends, @@ -591,7 +593,8 @@ def pxd(name): '_libs/src/khash', '_libs/tslibs/frequencies', '_libs/tslibs/timezones'], - 'depends': tseries_depends}, + 'depends': tseries_depends, + 'sources': np_datetime_sources}, '_libs.tslibs.strptime': { 'pyxfile': '_libs/tslibs/strptime', 'pxdfiles': ['_libs/src/util', @@ -604,6 +607,15 @@ def pxd(name): '_libs/tslibs/nattype'], 'depends': np_datetime_headers, 'sources': np_datetime_sources}, + '_libs.tslibs.timestamps': { + 'pyxfile': '_libs/tslibs/timestamps', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/conversion', + '_libs/tslibs/nattype', + '_libs/tslibs/timedeltas', + '_libs/tslibs/timezones'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, '_libs.tslibs.timezones': { 'pyxfile': '_libs/tslibs/timezones', 'pxdfiles': ['_libs/src/util']},