Merge branch 'master' of https://github.com/pandas-dev/pandas into error-on-non-naive-datetime-strings

1kastner · 1kastner · commit 931b7f9b5c48 · 2017-11-17T08:44:14.000+01:00
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -72,7 +72,13 @@ if [ "$LINT" ]; then
     echo "Linting *.c and *.h DONE"
 
     echo "Check for invalid testing"
-    grep -r -E --include '*.py' --exclude testing.py '(numpy|np)\.testing' pandas
+
+    # Check for the following code in testing:
+    #
+    # np.testing
+    # np.array_equal
+    grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
+
     if [ $? = "0" ]; then
         RET=1
     fi
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -46,13 +46,15 @@ Other API Changes
 - :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`)
 - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
 - `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
+- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
+
 
 .. _whatsnew_0220.deprecations:
 
 Deprecations
 ~~~~~~~~~~~~
 
--
+- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`).
 -
 -
 
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -22,14 +22,11 @@ from pandas._libs.tslib import Timestamp, Timedelta
 from datetime import datetime, timedelta, date
 
 from cpython cimport PyTuple_Check, PyList_Check
+from cpython.slice cimport PySlice_Check
 
 cdef int64_t iNaT = util.get_nat()
 
 
-cdef extern from "Python.h":
-    int PySlice_Check(object)
-
-
 cdef inline is_definitely_invalid_key(object val):
     if PyTuple_Check(val):
         try:
diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 # cython: profile=False
 from datetime import datetime, date, timedelta
-import operator
 
 from cpython cimport (
     PyUnicode_Check,
@@ -201,7 +200,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end):
         Py_ssize_t i, n
         freq_conv_func func
         asfreq_info finfo
-        int64_t val, ordinal
+        int64_t val
         char relation
 
     n = len(arr)
@@ -236,9 +235,6 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end):
 
 def period_ordinal(int y, int m, int d, int h, int min,
                    int s, int us, int ps, int freq):
-    cdef:
-        int64_t ordinal
-
     return get_period_ordinal(y, m, d, h, min, s, us, ps, freq)
 
 
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -5,7 +5,7 @@
 # distutils: define_macros=CYTHON_TRACE_NOGIL=0
 
 cimport numpy as np
-from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray,
+from numpy cimport (int32_t, int64_t, import_array, ndarray,
                     float64_t, NPY_DATETIME, NPY_TIMEDELTA)
 import numpy as np
 
@@ -24,8 +24,6 @@ from cpython cimport (
 cdef extern from "Python.h":
     cdef PyTypeObject *Py_TYPE(object)
 
-from libc.stdlib cimport free
-
 from util cimport (is_integer_object, is_float_object, is_string_object,
                    is_datetime64_object, is_timedelta64_object,
                    INT64_MAX)
@@ -51,7 +49,6 @@ from tslibs.np_datetime cimport (check_dts_bounds,
                                  PANDAS_DATETIMEUNIT, PANDAS_FR_ns,
                                  dt64_to_dtstruct, dtstruct_to_dt64,
                                  pydatetime_to_dt64, pydate_to_dt64,
-                                 npy_datetime,
                                  get_datetime64_unit, get_datetime64_value,
                                  get_timedelta64_value,
                                  days_per_month_table,
@@ -75,12 +72,10 @@ from tslibs.timedeltas cimport cast_from_unit, delta_to_nanoseconds
 from tslibs.timedeltas import Timedelta
 from tslibs.timezones cimport (
     is_utc, is_tzlocal, is_fixed_offset,
-    treat_tz_as_dateutil, treat_tz_as_pytz,
-    get_timezone, get_utcoffset, maybe_get_tz,
+    treat_tz_as_pytz,
+    get_timezone, maybe_get_tz,
     get_dst_info)
-from tslibs.fields import (
-    get_date_name_field, get_start_end_field, get_date_field,
-    build_field_sarray)
+from tslibs.fields import get_start_end_field, get_date_field
 from tslibs.conversion cimport (tz_convert_single, _TSObject,
                                 convert_to_tsobject,
                                 convert_datetime_to_tsobject,
@@ -1763,13 +1758,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
         return oresult
 
 
-cdef PyTypeObject* td_type = <PyTypeObject*> Timedelta
-
-
-cdef inline bint is_timedelta(object o):
-    return Py_TYPE(o) == td_type  # isinstance(o, Timedelta)
-
-
 # ----------------------------------------------------------------------
 # Conversion routines
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2099,10 +2099,8 @@ def _ixs(self, i, axis=0):
 
                 if index_len and not len(values):
                     values = np.array([np.nan] * index_len, dtype=object)
-                result = self._constructor_sliced.from_array(values,
-                                                             index=self.index,
-                                                             name=label,
-                                                             fastpath=True)
+                result = self._constructor_sliced._from_array(
+                    values, index=self.index, name=label, fastpath=True)
 
                 # this is a cached value, mark it so
                 result._set_as_cached(label, self)
@@ -2497,8 +2495,8 @@ def _box_item_values(self, key, values):
 
     def _box_col_values(self, values, items):
         """ provide boxed values for a column """
-        return self._constructor_sliced.from_array(values, index=self.index,
-                                                   name=items, fastpath=True)
+        return self._constructor_sliced._from_array(values, index=self.index,
+                                                    name=items, fastpath=True)
 
     def __setitem__(self, key, value):
         key = com._apply_if_callable(key, self)
@@ -4939,8 +4937,8 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
             res_index = self.index
             res_columns = self.columns
             values = self.values
-            series_gen = (Series.from_array(arr, index=res_columns, name=name,
-                                            dtype=dtype)
+            series_gen = (Series._from_array(arr, index=res_columns, name=name,
+                                             dtype=dtype)
                           for i, (arr, name) in enumerate(zip(values,
                                                               res_index)))
         else:  # pragma : no cover
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6338,6 +6338,11 @@ def truncate(self, before=None, after=None, axis=None, copy=True):
         axis = self._get_axis_number(axis)
         ax = self._get_axis(axis)
 
+        # GH 17935
+        # Check that index is sorted
+        if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing:
+            raise ValueError("truncate requires a sorted index")
+
         # if we have a date index, convert to dates, otherwise
         # treat like a slice
         if ax.is_all_dates:
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -55,7 +55,7 @@
 from pandas._libs import (lib, index as libindex, tslib as libts,
                           algos as libalgos, join as libjoin,
                           Timestamp, period as libperiod)
-from pandas._libs.tslibs import timezones, conversion
+from pandas._libs.tslibs import timezones, conversion, fields
 
 # -------- some conversion wrapper functions
 
@@ -75,20 +75,20 @@ def f(self):
                                                self.freq.kwds.get('month', 12))
                             if self.freq else 12)
 
-                result = libts.get_start_end_field(values, field, self.freqstr,
-                                                   month_kw)
+                result = fields.get_start_end_field(values, field,
+                                                    self.freqstr, month_kw)
             else:
-                result = libts.get_date_field(values, field)
+                result = fields.get_date_field(values, field)
 
             # these return a boolean by-definition
             return result
 
         if field in self._object_ops:
-            result = libts.get_date_name_field(values, field)
+            result = fields.get_date_name_field(values, field)
             result = self._maybe_mask_results(result)
 
         else:
-            result = libts.get_date_field(values, field)
+            result = fields.get_date_field(values, field)
             result = self._maybe_mask_results(result, convert='float64')
 
         return Index(result, name=self.name)
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -273,6 +273,25 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
     @classmethod
     def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
                    fastpath=False):
+        """
+        DEPRECATED: use the pd.Series(..) constructor instead.
+
+        """
+        warnings.warn("'from_array' is deprecated and will be removed in a "
+                      "future version. Please use the pd.Series(..) "
+                      "constructor instead.", FutureWarning, stacklevel=2)
+        return cls._from_array(arr, index=index, name=name, dtype=dtype,
+                               copy=copy, fastpath=fastpath)
+
+    @classmethod
+    def _from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
+                    fastpath=False):
+        """
+        Internal method used in DataFrame.__setitem__/__getitem__.
+        Difference with Series(..) is that this method checks if a sparse
+        array is passed.
+
+        """
         # return a sparse series here
         if isinstance(arr, ABCSparseArray):
             from pandas.core.sparse.series import SparseSeries
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
@@ -256,8 +256,18 @@ def npoints(self):
     def from_array(cls, arr, index=None, name=None, copy=False,
                    fill_value=None, fastpath=False):
         """
-        Simplified alternate constructor
+        DEPRECATED: use the pd.SparseSeries(..) constructor instead.
+
         """
+        warnings.warn("'from_array' is deprecated and will be removed in a "
+                      "future version. Please use the pd.SparseSeries(..) "
+                      "constructor instead.", FutureWarning, stacklevel=2)
+        return cls._from_array(arr, index=index, name=name, copy=copy,
+                               fill_value=fill_value, fastpath=fastpath)
+
+    @classmethod
+    def _from_array(cls, arr, index=None, name=None, copy=False,
+                    fill_value=None, fastpath=False):
         return cls(arr, index=index, name=name, copy=copy,
                    fill_value=fill_value, fastpath=fastpath)
 
diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py
@@ -377,6 +377,33 @@ def test_truncate_copy(self):
         truncated.values[:] = 5.
         assert not (self.tsframe.values[5:11] == 5).any()
 
+    def test_truncate_nonsortedindex(self):
+        # GH 17935
+
+        df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e']},
+                          index=[5, 3, 2, 9, 0])
+        with tm.assert_raises_regex(ValueError,
+                                    'truncate requires a sorted index'):
+            df.truncate(before=3, after=9)
+
+        rng = pd.date_range('2011-01-01', '2012-01-01', freq='W')
+        ts = pd.DataFrame({'A': np.random.randn(len(rng)),
+                           'B': np.random.randn(len(rng))},
+                          index=rng)
+        with tm.assert_raises_regex(ValueError,
+                                    'truncate requires a sorted index'):
+            ts.sort_values('A', ascending=False).truncate(before='2011-11',
+                                                          after='2011-12')
+
+        df = pd.DataFrame({3: np.random.randn(5),
+                           20: np.random.randn(5),
+                           2: np.random.randn(5),
+                           0: np.random.randn(5)},
+                          columns=[3, 20, 2, 0])
+        with tm.assert_raises_regex(ValueError,
+                                    'truncate requires a sorted index'):
+            df.truncate(before=2, after=20, axis=1)
+
     def test_asfreq(self):
         offset_monthly = self.tsframe.asfreq(offsets.BMonthEnd())
         rule_monthly = self.tsframe.asfreq('BM')
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -286,29 +286,6 @@ def test_delete(self):
         with pytest.raises(Exception):
             newb.delete(3)
 
-    def test_split_block_at(self):
-
-        # with dup column support this method was taken out
-        # GH3679
-        pytest.skip("skipping for now")
-
-        bs = list(self.fblock.split_block_at('a'))
-        assert len(bs) == 1
-        assert np.array_equal(bs[0].items, ['c', 'e'])
-
-        bs = list(self.fblock.split_block_at('c'))
-        assert len(bs) == 2
-        assert np.array_equal(bs[0].items, ['a'])
-        assert np.array_equal(bs[1].items, ['e'])
-
-        bs = list(self.fblock.split_block_at('e'))
-        assert len(bs) == 1
-        assert np.array_equal(bs[0].items, ['a', 'c'])
-
-        # bblock = get_bool_ex(['f'])
-        # bs = list(bblock.split_block_at('f'))
-        # assert len(bs), 0)
-
 
 class TestDatetimeBlock(object):
 
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -195,6 +195,11 @@ def test_constructor_dict_timedelta_index(self):
         )
         self._assert_series_equal(result, expected)
 
+    def test_from_array_deprecated(self):
+
+        with tm.assert_produces_warning(FutureWarning):
+            self.series_klass.from_array([1, 2, 3])
+
 
 class TestSeriesMisc(TestData, SharedWithSparse):
 
diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py
@@ -272,10 +272,9 @@ def test_truncate(self):
             pd.Period('2017-09-03')
         ])
         series2 = pd.Series([1, 2, 3], index=idx2)
-        result2 = series2.truncate(after='2017-09-02')
+        result2 = series2.sort_index().truncate(after='2017-09-02')
 
         expected_idx2 = pd.PeriodIndex([
-            pd.Period('2017-09-03'),
             pd.Period('2017-09-02')
         ])
-        tm.assert_series_equal(result2, pd.Series([1, 2], index=expected_idx2))
+        tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2))
diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py
@@ -236,6 +236,22 @@ def test_truncate(self):
                       before=self.ts.index[-1] + offset,
                       after=self.ts.index[0] - offset)
 
+    def test_truncate_nonsortedindex(self):
+        # GH 17935
+
+        s = pd.Series(['a', 'b', 'c', 'd', 'e'],
+                      index=[5, 3, 2, 9, 0])
+        with tm.assert_raises_regex(ValueError,
+                                    'truncate requires a sorted index'):
+            s.truncate(before=3, after=9)
+
+        rng = pd.date_range('2011-01-01', '2012-01-01', freq='W')
+        ts = pd.Series(np.random.randn(len(rng)), index=rng)
+        with tm.assert_raises_regex(ValueError,
+                                    'truncate requires a sorted index'):
+            ts.sort_values(ascending=False).truncate(before='2011-11',
+                                                     after='2011-12')
+
     def test_asfreq(self):
         ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime(
             2009, 11, 30), datetime(2009, 12, 31)])
@@ -919,8 +935,9 @@ def test_from_M8_structured(self):
         assert isinstance(s[0], Timestamp)
         assert s[0] == dates[0][0]
 
-        s = Series.from_array(arr['Date'], Index([0]))
-        assert s[0] == dates[0][0]
+        with pytest.warns(FutureWarning):
+            s = Series.from_array(arr['Date'], Index([0]))
+            assert s[0] == dates[0][0]
 
     def test_get_level_values_box(self):
         from pandas import MultiIndex

Original file line number	Diff line number	Diff line change
`@@ -46,13 +46,15 @@ Other API Changes`
`46`	`46`	- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`)
`47`	`47`	- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
`48`	`48`	- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
	`49`	+- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
	`50`	`+`
`49`	`51`
`50`	`52`	`.. _whatsnew_0220.deprecations:`
`51`	`53`
`52`	`54`	`Deprecations`
`53`	`55`	`~~~~~~~~~~~~`
`54`	`56`
`55`		`--`
	`57`	+- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`).
`56`	`58`	`-`
`57`	`59`	`-`
`58`	`60`
Original file line number	Diff line number	Diff line change
`@@ -195,6 +195,11 @@ def test_constructor_dict_timedelta_index(self):`
`195`	`195`	`)`
`196`	`196`	`self._assert_series_equal(result, expected)`
`197`	`197`
	`198`	`+ def test_from_array_deprecated(self):`
	`199`	`+`
	`200`	`+ with tm.assert_produces_warning(FutureWarning):`
	`201`	`+ self.series_klass.from_array([1, 2, 3])`
	`202`	`+`
`198`	`203`
`199`	`204`	`class TestSeriesMisc(TestData, SharedWithSparse):`
`200`	`205`