pandas-dev
diff --git a/‎doc/source/getting_started/install.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/getting_started/install.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 73 additions & 65 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 73 additions & 65 deletions
diff --git a/‎doc/source/whatsnew/v0.21.0.rst
Lines changed: 2 additions & 2 deletions b/‎doc/source/whatsnew/v0.21.0.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/source/whatsnew/v0.8.1.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v0.8.1.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.0.0.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.0.0.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/groupby.pyx
Lines changed: 1 addition & 2 deletions b/‎pandas/_libs/groupby.pyx
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/_libs/hashing.pyx
Lines changed: 5 additions & 5 deletions b/‎pandas/_libs/hashing.pyx
Lines changed: 5 additions & 5 deletions
diff --git a/‎pandas/_libs/index.pyx
Lines changed: 7 additions & 7 deletions b/‎pandas/_libs/index.pyx
Lines changed: 7 additions & 7 deletions
diff --git a/‎pandas/_libs/internals.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/internals.pyx
Lines changed: 1 addition & 1 deletion
@@ -218,7 +218,7 @@ Recommended dependencies
   ``numexpr`` uses multiple cores as well as smart chunking and caching to achieve large speedups.
   If installed, must be Version 2.6.2 or higher.
 
-* `bottleneck <https://github.com/kwgoodman/bottleneck>`__: for accelerating certain types of ``nan``
+* `bottleneck <https://github.com/pydata/bottleneck>`__: for accelerating certain types of ``nan``
   evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. If installed,
   must be Version 1.2.1 or higher.
 
 
@@ -5576,7 +5576,7 @@ Performance considerations
 --------------------------
 
 This is an informal comparison of various IO methods, using pandas
-0.20.3. Timings are machine dependent and small differences should be
+0.24.2. Timings are machine dependent and small differences should be
 ignored.
 
 .. code-block:: ipython
@@ -5597,11 +5597,18 @@ Given the next test set:
 
 .. code-block:: python
 
+
+
+   import numpy as np
+
    import os
 
    sz = 1000000
    df = pd.DataFrame({'A': np.random.randn(sz), 'B': [1] * sz})
 
+   sz = 1000000
+   np.random.seed(42)
+   df = pd.DataFrame({'A': np.random.randn(sz), 'B': [1] * sz})
 
    def test_sql_write(df):
        if os.path.exists('test.sql'):
@@ -5610,151 +5617,152 @@ Given the next test set:
        df.to_sql(name='test_table', con=sql_db)
        sql_db.close()
 
-
    def test_sql_read():
        sql_db = sqlite3.connect('test.sql')
        pd.read_sql_query("select * from test_table", sql_db)
        sql_db.close()
 
-
    def test_hdf_fixed_write(df):
        df.to_hdf('test_fixed.hdf', 'test', mode='w')
 
-
    def test_hdf_fixed_read():
        pd.read_hdf('test_fixed.hdf', 'test')
 
-
    def test_hdf_fixed_write_compress(df):
        df.to_hdf('test_fixed_compress.hdf', 'test', mode='w', complib='blosc')
 
-
    def test_hdf_fixed_read_compress():
        pd.read_hdf('test_fixed_compress.hdf', 'test')
 
-
    def test_hdf_table_write(df):
        df.to_hdf('test_table.hdf', 'test', mode='w', format='table')
 
-
    def test_hdf_table_read():
        pd.read_hdf('test_table.hdf', 'test')
 
-
    def test_hdf_table_write_compress(df):
        df.to_hdf('test_table_compress.hdf', 'test', mode='w',
                  complib='blosc', format='table')
 
-
    def test_hdf_table_read_compress():
        pd.read_hdf('test_table_compress.hdf', 'test')
 
-
    def test_csv_write(df):
        df.to_csv('test.csv', mode='w')
 
-
    def test_csv_read():
        pd.read_csv('test.csv', index_col=0)
 
-
    def test_feather_write(df):
        df.to_feather('test.feather')
 
-
    def test_feather_read():
        pd.read_feather('test.feather')
 
-
    def test_pickle_write(df):
        df.to_pickle('test.pkl')
 
-
    def test_pickle_read():
        pd.read_pickle('test.pkl')
 
-
    def test_pickle_write_compress(df):
        df.to_pickle('test.pkl.compress', compression='xz')
 
-
    def test_pickle_read_compress():
        pd.read_pickle('test.pkl.compress', compression='xz')
 
-When writing, the top-three functions in terms of speed are are
-``test_pickle_write``, ``test_feather_write`` and ``test_hdf_fixed_write_compress``.
+   def test_parquet_write(df):
+       df.to_parquet('test.parquet')
+
+   def test_parquet_read():
+       pd.read_parquet('test.parquet')
+
+When writing, the top-three functions in terms of speed are ``test_feather_write``, ``test_hdf_fixed_write`` and ``test_hdf_fixed_write_compress``.
 
 .. code-block:: ipython
 
-   In [14]: %timeit test_sql_write(df)
-   2.37 s ± 36.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+   In [4]: %timeit test_sql_write(df)
+   3.29 s ± 43.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 
-   In [15]: %timeit test_hdf_fixed_write(df)
-   194 ms ± 65.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+   In [5]: %timeit test_hdf_fixed_write(df)
+   19.4 ms ± 560 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
 
-   In [26]: %timeit test_hdf_fixed_write_compress(df)
-   119 ms ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+   In [6]: %timeit test_hdf_fixed_write_compress(df)
+   19.6 ms ± 308 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
 
-   In [16]: %timeit test_hdf_table_write(df)
-   623 ms ± 125 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+   In [7]: %timeit test_hdf_table_write(df)
+   449 ms ± 5.61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 
-   In [27]: %timeit test_hdf_table_write_compress(df)
-   563 ms ± 23.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+   In [8]: %timeit test_hdf_table_write_compress(df)
+   448 ms ± 11.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 
-   In [17]: %timeit test_csv_write(df)
-   3.13 s ± 49.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+   In [9]: %timeit test_csv_write(df)
+   3.66 s ± 26.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 
-   In [30]: %timeit test_feather_write(df)
-   103 ms ± 5.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+   In [10]: %timeit test_feather_write(df)
+   9.75 ms ± 117 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
 
-   In [31]: %timeit test_pickle_write(df)
-   109 ms ± 3.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+   In [11]: %timeit test_pickle_write(df)
+   30.1 ms ± 229 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
 
-   In [32]: %timeit test_pickle_write_compress(df)
-   3.33 s ± 55.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+   In [12]: %timeit test_pickle_write_compress(df)
+   4.29 s ± 15.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [13]: %timeit test_parquet_write(df)
+   67.6 ms ± 706 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
 
 When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and
 ``test_hdf_fixed_read``.
 
+
 .. code-block:: ipython
 
-   In [18]: %timeit test_sql_read()
-   1.35 s ± 14.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+   In [14]: %timeit test_sql_read()
+   1.77 s ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [15]: %timeit test_hdf_fixed_read()
+   19.4 ms ± 436 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [16]: %timeit test_hdf_fixed_read_compress()
+   19.5 ms ± 222 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
 
-   In [19]: %timeit test_hdf_fixed_read()
-   14.3 ms ± 438 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+   In [17]: %timeit test_hdf_table_read()
+   38.6 ms ± 857 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
 
-   In [28]: %timeit test_hdf_fixed_read_compress()
-   23.5 ms ± 672 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+   In [18]: %timeit test_hdf_table_read_compress()
+   38.8 ms ± 1.49 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
 
-   In [20]: %timeit test_hdf_table_read()
-   35.4 ms ± 314 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+   In [19]: %timeit test_csv_read()
+   452 ms ± 9.04 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 
-   In [29]: %timeit test_hdf_table_read_compress()
-   42.6 ms ± 2.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+   In [20]: %timeit test_feather_read()
+   12.4 ms ± 99.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
 
-   In [22]: %timeit test_csv_read()
-   516 ms ± 27.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+   In [21]: %timeit test_pickle_read()
+   18.4 ms ± 191 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
 
-   In [33]: %timeit test_feather_read()
-   4.06 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+   In [22]: %timeit test_pickle_read_compress()
+   915 ms ± 7.48 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 
-   In [34]: %timeit test_pickle_read()
-   6.5 ms ± 172 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+   In [23]: %timeit test_parquet_read()
+   24.4 ms ± 146 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
 
-   In [35]: %timeit test_pickle_read_compress()
-   588 ms ± 3.57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 
+For this test case ``test.pkl.compress``, ``test.parquet`` and ``test.feather`` took the least space on disk.
 Space on disk (in bytes)
 
 .. code-block:: none
 
-    34816000 Aug 21 18:00 test.sql
-    24009240 Aug 21 18:00 test_fixed.hdf
-     7919610 Aug 21 18:00 test_fixed_compress.hdf
-    24458892 Aug 21 18:00 test_table.hdf
-     8657116 Aug 21 18:00 test_table_compress.hdf
-    28520770 Aug 21 18:00 test.csv
-    16000248 Aug 21 18:00 test.feather
-    16000848 Aug 21 18:00 test.pkl
-     7554108 Aug 21 18:00 test.pkl.compress
+    29519500 Oct 10 06:45 test.csv
+    16000248 Oct 10 06:45 test.feather
+    8281983  Oct 10 06:49 test.parquet
+    16000857 Oct 10 06:47 test.pkl
+    7552144  Oct 10 06:48 test.pkl.compress
+    34816000 Oct 10 06:42 test.sql
+    24009288 Oct 10 06:43 test_fixed.hdf
+    24009288 Oct 10 06:43 test_fixed_compress.hdf
+    24458940 Oct 10 06:44 test_table.hdf
+    24458940 Oct 10 06:44 test_table_compress.hdf
+
+
+
@@ -20,7 +20,7 @@ Highlights include:
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here <whatsnew_0210.enhancements.parquet>`.
 - New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
   categoricals independent of the data, see :ref:`here <whatsnew_0210.enhancements.categorical_dtype>`.
-- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
+- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <https://bottleneck.readthedocs.io>`__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
 - Compatibility fixes for pypy, see :ref:`here <whatsnew_0210.pypy>`.
 - Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here <whatsnew_0210.enhancements.drop_api>`.
 - Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here <whatsnew_0210.enhancements.infer_objects>`) and ``GroupBy.pipe`` (see :ref:`here <whatsnew_0210.enhancements.GroupBy_pipe>`).
@@ -390,7 +390,7 @@ Sum/Prod of all-NaN or empty Series/DataFrames is now consistently NaN
 
 
 The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames no longer depends on
-whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, and return value of ``sum`` and ``prod`` on an empty Series has changed (:issue:`9422`, :issue:`15507`).
+whether `bottleneck <https://bottleneck.readthedocs.io>`__ is installed, and return value of ``sum`` and ``prod`` on an empty Series has changed (:issue:`9422`, :issue:`15507`).
 
 Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, will result in ``NaN``. See the :ref:`docs <missing_data.numeric_sum>`.
 
 
@@ -29,7 +29,7 @@ Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
   - Improved implementation of rolling min and max (thanks to `Bottleneck
-    <http://berkeleyanalytics.com/bottleneck/>`__ !)
+    <https://bottleneck.readthedocs.io>`__ !)
   - Add accelerated ``'median'`` GroupBy option (:issue:`1358`)
   - Significantly improve the performance of parsing ISO8601-format date
     strings with ``DatetimeIndex`` or ``to_datetime`` (:issue:`1571`)
 
@@ -414,6 +414,7 @@ Plotting
 - Bug in the ``xticks`` argument being ignored for :meth:`DataFrame.plot.bar` (:issue:`14119`)
 - :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
 - :meth:`DataFrame.plot` now allow a ``backend`` keyword arugment to allow changing between backends in one session (:issue:`28619`).
+- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`).
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -753,8 +753,7 @@ def group_quantile(ndarray[float64_t] out,
     assert values.shape[0] == N
 
     if not (0 <= q <= 1):
-        raise ValueError("'q' must be between 0 and 1. Got"
-                         " '{}' instead".format(q))
+        raise ValueError(f"'q' must be between 0 and 1. Got '{q}' instead")
 
     inter_methods = {
         'linear': INTERPOLATION_LINEAR,
 
@@ -47,8 +47,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
     k = <bytes>key.encode(encoding)
     kb = <uint8_t *>k
     if len(k) != 16:
-        raise ValueError("key should be a 16-byte string encoded, "
-                         "got {key} (len {klen})".format(key=k, klen=len(k)))
+        raise ValueError(f"key should be a 16-byte string encoded, "
+                         f"got {k} (len {len(k)})")
 
     n = len(arr)
 
@@ -67,9 +67,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
             data = <bytes>str(val).encode(encoding)
 
         else:
-            raise TypeError("{val} of type {typ} is not a valid type "
-                            "for hashing, must be string or null"
-                            .format(val=val, typ=type(val)))
+            raise TypeError(f"{val} of type {type(val)} is not a valid type "
+                            f"for hashing, must be string or null"
+                            )
 
         l = len(data)
         lens[i] = l
 
@@ -109,7 +109,7 @@ cdef class IndexEngine:
             Py_ssize_t loc
 
         if is_definitely_invalid_key(val):
-            raise TypeError("'{val}' is an invalid key".format(val=val))
+            raise TypeError(f"'{val}' is an invalid key")
 
         if self.over_size_threshold and self.is_monotonic_increasing:
             if not self.is_unique:
@@ -556,8 +556,8 @@ cpdef convert_scalar(ndarray arr, object value):
             pass
         elif value is None or value != value:
             return np.datetime64("NaT", "ns")
-        raise ValueError("cannot set a Timestamp with a non-timestamp {typ}"
-                         .format(typ=type(value).__name__))
+        raise ValueError(f"cannot set a Timestamp with a non-timestamp "
+                         f"{type(value).__name__}")
 
     elif arr.descr.type_num == NPY_TIMEDELTA:
         if util.is_array(value):
@@ -573,8 +573,8 @@ cpdef convert_scalar(ndarray arr, object value):
             pass
         elif value is None or value != value:
             return np.timedelta64("NaT", "ns")
-        raise ValueError("cannot set a Timedelta with a non-timedelta {typ}"
-                         .format(typ=type(value).__name__))
+        raise ValueError(f"cannot set a Timedelta with a non-timedelta "
+                         f"{type(value).__name__}")
 
     if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and
             not issubclass(arr.dtype.type, np.bool_)):
@@ -677,7 +677,7 @@ cdef class BaseMultiIndexCodesEngine:
             # Index._get_fill_indexer), sort (integer representations of) keys:
             order = np.argsort(lab_ints)
             lab_ints = lab_ints[order]
-            indexer = (getattr(self._base, 'get_{}_indexer'.format(method))
+            indexer = (getattr(self._base, f'get_{method}_indexer')
                        (self, lab_ints, limit=limit))
             indexer = indexer[order]
         else:
@@ -687,7 +687,7 @@ cdef class BaseMultiIndexCodesEngine:
 
     def get_loc(self, object key):
         if is_definitely_invalid_key(key):
-            raise TypeError("'{key}' is an invalid key".format(key=key))
+            raise TypeError(f"'{key}' is an invalid key")
         if not isinstance(key, tuple):
             raise KeyError(key)
         try:
 
@@ -61,7 +61,7 @@ cdef class BlockPlacement:
         else:
             v = self._as_array
 
-        return '%s(%r)' % (self.__class__.__name__, v)
+        return f'{self.__class__.__name__}({v})'
 
     def __repr__(self) -> str:
         return str(self)