pandas-dev
diff --git a/‎.travis.yml
Lines changed: 4 additions & 4 deletions b/‎.travis.yml
Lines changed: 4 additions & 4 deletions
diff --git a/‎LICENSES/DATEUTIL_LICENSE
Lines changed: 54 additions & 0 deletions b/‎LICENSES/DATEUTIL_LICENSE
Lines changed: 54 additions & 0 deletions
diff --git a/‎doc/source/cookbook.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/cookbook.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/gotchas.rst
Lines changed: 22 additions & 2 deletions b/‎doc/source/gotchas.rst
Lines changed: 22 additions & 2 deletions
diff --git a/‎doc/source/index.rst.template
Lines changed: 1 addition & 0 deletions b/‎doc/source/index.rst.template
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/integer_na.rst
Lines changed: 101 additions & 0 deletions b/‎doc/source/integer_na.rst
Lines changed: 101 additions & 0 deletions
diff --git a/‎doc/source/io.rst
Lines changed: 10 additions & 6 deletions b/‎doc/source/io.rst
Lines changed: 10 additions & 6 deletions
diff --git a/‎doc/source/missing_data.rst
Lines changed: 38 additions & 26 deletions b/‎doc/source/missing_data.rst
Lines changed: 38 additions & 26 deletions
@@ -34,23 +34,23 @@ matrix:
     include:
     - dist: trusty
       env:
-        - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="not slow and not network"
+        - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
 
     - dist: trusty
       env:
-        - JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" PATTERN="not slow and db"
+        - JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" PATTERN="(not slow or (single and db))"
       addons:
         apt:
           packages:
           - python-gtk2
 
     - dist: trusty
       env:
-        - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="not slow and not network and db" LOCALE_OVERRIDE="zh_CN.UTF-8"
+        - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8"
 
     - dist: trusty
       env:
-        - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="not slow and not network and db" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
+        - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="((not slow and not network) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
 
     # In allow_failures
     - dist: trusty
 
@@ -0,0 +1,54 @@
+Copyright 2017- Paul Ganssle <paul@ganssle.io>
+Copyright 2017- dateutil contributors (see AUTHORS file)
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+The above license applies to all contributions after 2017-12-01, as well as
+all contributions that have been re-licensed (see AUTHORS file for the list of
+contributors who have re-licensed their code).
+--------------------------------------------------------------------------------
+dateutil - Extensions to the standard Python datetime module.
+
+Copyright (c) 2003-2011 - Gustavo Niemeyer <gustavo@niemeyer.net>
+Copyright (c) 2012-2014 - Tomi Pieviläinen <tomi.pievilainen@iki.fi>
+Copyright (c) 2014-2016 - Yaron de Leeuw <me@jarondl.net>
+Copyright (c) 2015-     - Paul Ganssle <paul@ganssle.io>
+Copyright (c) 2015-     - dateutil contributors (see AUTHORS file)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+The above BSD License Applies to all code, even that also covered by Apache 2.0.
@@ -1236,7 +1236,7 @@ the following Python code will read the binary file ``'binary.dat'`` into a
 pandas ``DataFrame``, where each element of the struct corresponds to a column
 in the frame:
 
-.. ipython:: python
+.. code-block:: python
 
    names = 'count', 'avg', 'scale'
 
 
@@ -215,8 +215,28 @@ arrays. For example:
    s2.dtype
 
 This trade-off is made largely for memory and performance reasons, and also so
-that the resulting ``Series`` continues to be "numeric". One possibility is to
-use ``dtype=object`` arrays instead.
+that the resulting ``Series`` continues to be "numeric".
+
+If you need to represent integers with possibly missing values, use one of
+the nullable-integer extension dtypes provided by pandas
+
+* :class:`Int8Dtype`
+* :class:`Int16Dtype`
+* :class:`Int32Dtype`
+* :class:`Int64Dtype`
+
+.. ipython:: python
+
+   s_int = pd.Series([1, 2, 3, 4, 5], index=list('abcde'),
+                     dtype=pd.Int64Dtype())
+   s_int
+   s_int.dtype
+
+   s2_int = s_int.reindex(['a', 'b', 'c', 'f', 'u'])
+   s2_int
+   s2_int.dtype
+
+See :ref:`integer_na` for more.
 
 ``NA`` type promotions
 ~~~~~~~~~~~~~~~~~~~~~~
 
@@ -143,6 +143,7 @@ See the package overview for more detail about what's in the library.
     timeseries
     timedeltas
     categorical
+    integer_na
     visualization
     style
     io
 
@@ -0,0 +1,101 @@
+.. currentmodule:: pandas
+
+{{ header }}
+
+.. _integer_na:
+
+**************************
+Nullable Integer Data Type
+**************************
+
+.. versionadded:: 0.24.0
+
+In :ref:`missing_data`, we saw that pandas primarily uses ``NaN`` to represent
+missing data. Because ``NaN`` is a float, this forces an array of integers with
+any missing values to become floating point. In some cases, this may not matter
+much. But if your integer column is, say, an identifier, casting to float can
+be problematic. Some integers cannot even be represented as floating point
+numbers.
+
+Pandas can represent integer data with possibly missing values using
+:class:`arrays.IntegerArray`. This is an :ref:`extension types <extending.extension-types>`
+implemented within pandas. It is not the default dtype for integers, and will not be inferred;
+you must explicitly pass the dtype into :meth:`array` or :class:`Series`:
+
+.. ipython:: python
+
+   arr = pd.array([1, 2, np.nan], dtype=pd.Int64Dtype())
+   arr
+
+Or the string alias ``"Int64"`` (note the capital ``"I"``, to differentiate from
+NumPy's ``'int64'`` dtype:
+
+.. ipython:: python
+
+   pd.array([1, 2, np.nan], dtype="Int64")
+
+This array can be stored in a :class:`DataFrame` or :class:`Series` like any
+NumPy array.
+
+.. ipython:: python
+
+   pd.Series(arr)
+
+You can also pass the list-like object to the :class:`Series` constructor
+with the dtype.
+
+.. ipython:: python
+
+   s = pd.Series([1, 2, np.nan], dtype="Int64")
+   s
+
+By default (if you don't specify ``dtype``), NumPy is used, and you'll end
+up with a ``float64`` dtype Series:
+
+.. ipython:: python
+
+   pd.Series([1, 2, np.nan])
+
+Operations involving an integer array will behave similar to NumPy arrays.
+Missing values will be propagated, and and the data will be coerced to another
+dtype if needed.
+
+.. ipython:: python
+
+   # arithmetic
+   s + 1
+
+   # comparison
+   s == 1
+
+   # indexing
+   s.iloc[1:3]
+
+   # operate with other dtypes
+   s + s.iloc[1:3].astype('Int8')
+
+   # coerce when needed
+   s + 0.01
+
+These dtypes can operate as part of of ``DataFrame``.
+
+.. ipython:: python
+
+   df = pd.DataFrame({'A': s, 'B': [1, 1, 3], 'C': list('aab')})
+   df
+   df.dtypes
+
+
+These dtypes can be merged & reshaped & casted.
+
+.. ipython:: python
+
+   pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes
+   df['A'].astype(float)
+
+Reduction and groupby operations such as 'sum' work as well.
+
+.. ipython:: python
+
+   df.sum()
+   df.groupby('B').A.sum()
@@ -362,16 +362,17 @@ columns:
 
 .. ipython:: python
 
-    data = ('a,b,c\n'
-            '1,2,3\n'
-            '4,5,6\n'
-            '7,8,9')
+    data = ('a,b,c,d\n'
+            '1,2,3,4\n'
+            '5,6,7,8\n'
+            '9,10,11')
     print(data)
 
     df = pd.read_csv(StringIO(data), dtype=object)
     df
     df['a'][0]
-    df = pd.read_csv(StringIO(data), dtype={'b': object, 'c': np.float64})
+    df = pd.read_csv(StringIO(data),
+                     dtype={'b': object, 'c': np.float64, 'd': 'Int64'})
     df.dtypes
 
 Fortunately, pandas offers more than one way to ensure that your column(s)
@@ -4646,6 +4647,7 @@ Write to a feather file.
 Read from a feather file.
 
 .. ipython:: python
+   :okwarning:
 
    result = pd.read_feather('example.feather')
    result
@@ -4720,6 +4722,7 @@ Write to a parquet file.
 Read from a parquet file.
 
 .. ipython:: python
+   :okwarning:
 
    result = pd.read_parquet('example_fp.parquet', engine='fastparquet')
    result = pd.read_parquet('example_pa.parquet', engine='pyarrow')
@@ -4790,6 +4793,7 @@ Partitioning Parquet files
 Parquet supports partitioning of data based on the values of one or more columns.
 
 .. ipython:: python
+    :okwarning:
 
     df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]})
     df.to_parquet(fname='test', engine='pyarrow',
@@ -4879,7 +4883,7 @@ below and the SQLAlchemy `documentation <https://docs.sqlalchemy.org/en/latest/c
 
 If you want to manage your own connections you can pass one of those instead:
 
-.. ipython:: python
+.. code-block:: python
 
    with engine.connect() as conn, conn.begin():
        data = pd.read_sql_table('data', conn)
 
@@ -19,32 +19,6 @@ pandas.
 
 See the :ref:`cookbook<cookbook.missing_data>` for some advanced strategies.
 
-Missing data basics
--------------------
-
-When / why does data become missing?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Some might quibble over our usage of *missing*. By "missing" we simply mean
-**NA** ("not available") or "not present for whatever reason". Many data sets simply arrive with
-missing data, either because it exists and was not collected or it never
-existed. For example, in a collection of financial time series, some of the time
-series might start on different dates. Thus, values prior to the start date
-would generally be marked as missing.
-
-In pandas, one of the most common ways that missing data is **introduced** into
-a data set is by reindexing. For example:
-
-.. ipython:: python
-
-   df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 'h'],
-                     columns=['one', 'two', 'three'])
-   df['four'] = 'bar'
-   df['five'] = df['one'] > 0
-   df
-   df2 = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
-   df2
-
 Values considered "missing"
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -62,6 +36,16 @@ arise and we wish to also consider that "missing" or "not available" or "NA".
 
 .. _missing.isna:
 
+.. ipython:: python
+
+   df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 'h'],
+                     columns=['one', 'two', 'three'])
+   df['four'] = 'bar'
+   df['five'] = df['one'] > 0
+   df
+   df2 = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
+   df2
+
 To make detecting missing values easier (and across different array dtypes),
 pandas provides the :func:`isna` and
 :func:`notna` functions, which are also methods on
@@ -90,6 +74,23 @@ Series and DataFrame objects:
 
       df2['one'] == np.nan
 
+Integer Dtypes and Missing Data
+-------------------------------
+
+Because ``NaN`` is a float, a column of integers with even one missing values
+is cast to floating-point dtype (see :ref:`gotchas.intna` for more). Pandas
+provides a nullable integer array, which can be used by explicitly requesting
+the dtype:
+
+.. ipython:: python
+
+   pd.Series([1, 2, np.nan, 4], dtype=pd.Int64Dtype())
+
+Alternatively, the string alias ``dtype='Int64'`` (note the capital ``"I"``) can be
+used.
+
+See :ref:`integer_na` for more.
+
 Datetimes
 ---------
 
@@ -751,3 +752,14 @@ However, these can be filled in using :meth:`~DataFrame.fillna` and it will work
 
    reindexed[crit.fillna(False)]
    reindexed[crit.fillna(True)]
+
+Pandas provides a nullable integer dtype, but you must explicitly request it
+when creating the series or column. Notice that we use a capital "I" in
+the ``dtype="Int64"``.
+
+.. ipython:: python
+
+   s = pd.Series([0, 1, np.nan, 3, 4], dtype="Int64")
+   s
+
+See :ref:`integer_na` for more.