Merge branch 'master' of https://github.com/pandas-dev/pandas

TanyaaCJain · TanyaaCJain · commit 359189493939 · 2019-08-22T18:32:35.000+05:30
diff --git a/ci/run_tests.sh b/ci/run_tests.sh
@@ -50,9 +50,10 @@ do
     # if no tests are found (the case of "single and slow"), pytest exits with code 5, and would make the script fail, if not for the below code
     sh -c "$PYTEST_CMD; ret=\$?; [ \$ret = 5 ] && exit 0 || exit \$ret"
 
-    if [[ "$COVERAGE" && $? == 0 ]]; then
-        echo "uploading coverage for $TYPE tests"
-        echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
-              bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
-    fi
+    # 2019-08-21 disabling because this is hitting HTTP 400 errors GH#27602
+    # if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then
+    #    echo "uploading coverage for $TYPE tests"
+    #    echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
+    #          bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
+    # fi
 done
diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst
@@ -1,56 +1,44 @@
 .. _whatsnew_0251:
 
-What's new in 0.25.1 (July XX, 2019)
-------------------------------------
+What's new in 0.25.1 (August 21, 2019)
+--------------------------------------
 
-Enhancements
-~~~~~~~~~~~~
-
-
-.. _whatsnew_0251.enhancements.other:
+These are the changes in pandas 0.25.1. See :ref:`release` for a full changelog
+including other versions of pandas.
 
-Other enhancements
-^^^^^^^^^^^^^^^^^^
+I/O and LZMA
+~~~~~~~~~~~~
 
--
--
--
+Some users may unknowingly have an incomplete Python installation lacking the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue: `27575`).
+Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`.
+A possible fix for the lack of the `lzma` module is to ensure you have the necessary libraries and then re-install Python.
+For example, on MacOS installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python.
 
 .. _whatsnew_0251.bug_fixes:
 
 Bug fixes
 ~~~~~~~~~
 
-
 Categorical
 ^^^^^^^^^^^
 
-- Bug in :meth:`Categorical.fillna` would replace all values, not just those that are ``NaN`` (:issue:`26215`)
--
+- Bug in :meth:`Categorical.fillna` that would replace all values, not just those that are ``NaN`` (:issue:`26215`)
 
 Datetimelike
 ^^^^^^^^^^^^
+
 - Bug in :func:`to_datetime` where passing a timezone-naive :class:`DatetimeArray` or :class:`DatetimeIndex` and ``utc=True`` would incorrectly return a timezone-naive result (:issue:`27733`)
 - Bug in :meth:`Period.to_timestamp` where a :class:`Period` outside the :class:`Timestamp` implementation bounds (roughly 1677-09-21 to 2262-04-11) would return an incorrect :class:`Timestamp` instead of raising ``OutOfBoundsDatetime`` (:issue:`19643`)
--
--
-
-Timedelta
-^^^^^^^^^
-
--
--
--
+- Bug in iterating over :class:`DatetimeIndex` when the underlying data is read-only (:issue:`28055`)
 
 Timezones
 ^^^^^^^^^
 
 - Bug in :class:`Index` where a numpy object array with a timezone aware :class:`Timestamp` and ``np.nan`` would not return a :class:`DatetimeIndex` (:issue:`27011`)
--
--
 
 Numeric
 ^^^^^^^
+
 - Bug in :meth:`Series.interpolate` when using a timezone aware :class:`DatetimeIndex` (:issue:`27548`)
 - Bug when printing negative floating point complex numbers would raise an ``IndexError`` (:issue:`27484`)
 - Bug where :class:`DataFrame` arithmetic operators such as :meth:`DataFrame.mul` with a :class:`Series` with axis=1 would raise an ``AttributeError`` on :class:`DataFrame` larger than the minimum threshold to invoke numexpr (:issue:`27636`)
@@ -60,23 +48,11 @@ Conversion
 ^^^^^^^^^^
 
 - Improved the warnings for the deprecated methods :meth:`Series.real` and :meth:`Series.imag` (:issue:`27610`)
--
--
-
-Strings
-^^^^^^^
-
--
--
--
-
 
 Interval
 ^^^^^^^^
+
 - Bug in :class:`IntervalIndex` where `dir(obj)` would raise ``ValueError`` (:issue:`27571`)
--
--
--
 
 Indexing
 ^^^^^^^^
@@ -85,47 +61,35 @@ Indexing
 - Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. (:issue:`27585`, :issue:`27840`)
 - Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns (:issue:`27841`).
 - Fix regression in ``.ix`` fallback with an ``IntervalIndex`` (:issue:`27865`).
--
 
 Missing
 ^^^^^^^
 
-- Bug in :func:`pandas.isnull` or :func:`pandas.isna` when the input is a type e.g. `type(pandas.Series())` (:issue:`27482`)
--
--
-
-MultiIndex
-^^^^^^^^^^
-
--
--
--
+- Bug in :func:`pandas.isnull` or :func:`pandas.isna` when the input is a type e.g. ``type(pandas.Series())`` (:issue:`27482`)
 
 I/O
 ^^^
+
 - Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`)
 - Better error message when a negative header is passed in :func:`pandas.read_csv` (:issue:`27779`)
-- Follow the ``min_rows`` display option (introduced in v0.25.0) correctly in the html repr in the notebook (:issue:`27991`).
--
+- Follow the ``min_rows`` display option (introduced in v0.25.0) correctly in the HTML repr in the notebook (:issue:`27991`).
 
 Plotting
 ^^^^^^^^
 
-- Added a pandas_plotting_backends entrypoint group for registering plot backends. See :ref:`extending.plotting-backends` for more (:issue:`26747`).
+- Added a ``pandas_plotting_backends`` entrypoint group for registering plot backends. See :ref:`extending.plotting-backends` for more (:issue:`26747`).
 - Fixed the re-instatement of Matplotlib datetime converters after calling
-  `pandas.plotting.deregister_matplotlib_converters()` (:issue:`27481`).
--
+  :meth:`pandas.plotting.deregister_matplotlib_converters` (:issue:`27481`).
 - Fix compatibility issue with matplotlib when passing a pandas ``Index`` to a plot call (:issue:`27775`).
--
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
+- Fixed regression in :meth:`pands.core.groupby.DataFrameGroupBy.quantile` raising when multiple quantiles are given (:issue:`27526`)
 - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`)
 - Bug in windowing over read-only arrays (:issue:`27766`)
 - Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`)
--
 
 Reshaping
 ^^^^^^^^^
@@ -137,40 +101,13 @@ Reshaping
 
 Sparse
 ^^^^^^
-- Bug in reductions for :class:`Series` with Sparse dtypes (:issue:`27080`)
--
--
--
-
-
-Build Changes
-^^^^^^^^^^^^^
-
--
--
--
-
-ExtensionArray
-^^^^^^^^^^^^^^
 
--
--
--
+- Bug in reductions for :class:`Series` with Sparse dtypes (:issue:`27080`)
 
 Other
 ^^^^^
-- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when replacing timezone-aware timestamps using a dict-like replacer (:issue:`27720`)
--
--
--
-
-I/O and LZMA
-~~~~~~~~~~~~
 
-Some users may unknowingly have an incomplete Python installation, which lacks the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue: `27575`).
-Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`.
-A possible fix for the lack of the `lzma` module is to ensure you have the necessary libraries and then re-install Python.
-For example, on MacOS installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python.
+- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when replacing timezone-aware timestamps using a dict-like replacer (:issue:`27720`)
 
 .. _whatsnew_0.251.contributors:
 
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -71,7 +71,7 @@ cdef inline object create_time_from_ts(
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def ints_to_pydatetime(int64_t[:] arr, object tz=None, object freq=None,
+def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
                        str box="datetime"):
     """
     Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3098,7 +3098,7 @@ def _ensure_valid_index(self, value):
         passed value.
         """
         # GH5632, make sure that we are a Series convertible
-        if not len(self.index) and is_list_like(value):
+        if not len(self.index) and is_list_like(value) and len(value):
             try:
                 value = Series(value)
             except (ValueError, NotImplementedError, TypeError):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1874,6 +1874,7 @@ def quantile(self, q=0.5, interpolation="linear"):
         a    2.0
         b    3.0
         """
+        from pandas import concat
 
         def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
             if is_object_dtype(vals):
@@ -1901,18 +1902,57 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
 
             return vals
 
-        return self._get_cythonized_result(
-            "group_quantile",
-            self.grouper,
-            aggregate=True,
-            needs_values=True,
-            needs_mask=True,
-            cython_dtype=np.float64,
-            pre_processing=pre_processor,
-            post_processing=post_processor,
-            q=q,
-            interpolation=interpolation,
-        )
+        if is_scalar(q):
+            return self._get_cythonized_result(
+                "group_quantile",
+                self.grouper,
+                aggregate=True,
+                needs_values=True,
+                needs_mask=True,
+                cython_dtype=np.float64,
+                pre_processing=pre_processor,
+                post_processing=post_processor,
+                q=q,
+                interpolation=interpolation,
+            )
+        else:
+            results = [
+                self._get_cythonized_result(
+                    "group_quantile",
+                    self.grouper,
+                    aggregate=True,
+                    needs_values=True,
+                    needs_mask=True,
+                    cython_dtype=np.float64,
+                    pre_processing=pre_processor,
+                    post_processing=post_processor,
+                    q=qi,
+                    interpolation=interpolation,
+                )
+                for qi in q
+            ]
+            result = concat(results, axis=0, keys=q)
+            # fix levels to place quantiles on the inside
+            # TODO(GH-10710): Ideally, we could write this as
+            #  >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :]
+            #  but this hits https://github.com/pandas-dev/pandas/issues/10710
+            #  which doesn't reorder the list-like `q` on the inner level.
+            order = np.roll(list(range(result.index.nlevels)), -1)
+            result = result.reorder_levels(order)
+            result = result.reindex(q, level=-1)
+
+            # fix order.
+            hi = len(q) * self.ngroups
+            arr = np.arange(0, hi, self.ngroups)
+            arrays = []
+
+            for i in range(self.ngroups):
+                arr = arr + i
+                arrays.append(arr)
+
+            indices = np.concatenate(arrays)
+            assert len(indices) == len(result)
+            return result.take(indices)
 
     @Substitution(name="groupby")
     def ngroup(self, ascending=True):
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -821,6 +821,14 @@ def test_setitem_empty_frame_with_boolean(self, dtype, kwargs):
         df[df > df2] = 47
         assert_frame_equal(df, df2)
 
+    def test_setitem_with_empty_listlike(self):
+        # GH #17101
+        index = pd.Index([], name="idx")
+        result = pd.DataFrame(columns=["A"], index=index)
+        result["A"] = []
+        expected = pd.DataFrame(columns=["A"], index=index)
+        tm.assert_index_equal(result.index, expected.index)
+
     def test_setitem_scalars_no_index(self):
         # GH16823 / 17894
         df = DataFrame()
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -1238,6 +1238,57 @@ def test_quantile(interpolation, a_vals, b_vals, q):
     tm.assert_frame_equal(result, expected)
 
 
+def test_quantile_array():
+    # https://github.com/pandas-dev/pandas/issues/27526
+    df = pd.DataFrame({"A": [0, 1, 2, 3, 4]})
+    result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25])
+
+    index = pd.MultiIndex.from_product([[0, 1], [0.25]])
+    expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index)
+    tm.assert_frame_equal(result, expected)
+
+    df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]})
+    index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]])
+
+    result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75])
+    expected = pd.DataFrame(
+        {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_quantile_array_no_sort():
+    df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]})
+    result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75])
+    expected = pd.DataFrame(
+        {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]},
+        index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+    result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25])
+    expected = pd.DataFrame(
+        {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]},
+        index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_quantile_array_multiple_levels():
+    df = pd.DataFrame(
+        {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]}
+    )
+    result = df.groupby(["c", "d"]).quantile([0.25, 0.75])
+    index = pd.MultiIndex.from_tuples(
+        [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)],
+        names=["c", "d", None],
+    )
+    expected = pd.DataFrame(
+        {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 def test_quantile_raises():
     df = pd.DataFrame(
         [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]
diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
@@ -377,3 +377,11 @@ def test_nanosecond_field(self):
         dti = DatetimeIndex(np.arange(10))
 
         tm.assert_index_equal(dti.nanosecond, pd.Index(np.arange(10, dtype=np.int64)))
+
+
+def test_iter_readonly():
+    # GH#28055 ints_to_pydatetime with readonly array
+    arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")])
+    arr.setflags(write=False)
+    dti = pd.to_datetime(arr)
+    list(dti)
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py