Skip to content

Commit 5a1375a

Browse files
committed
Merge remote-tracking branch 'upstream/master' into quantile_regression
2 parents a2ce1c8 + e6e0889 commit 5a1375a

File tree

285 files changed

+5137
-4679
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

285 files changed

+5137
-4679
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ repos:
33
rev: 19.10b0
44
hooks:
55
- id: black
6-
language_version: python3.7
6+
language_version: python3
77
- repo: https://gitlab.com/pycqa/flake8
88
rev: 3.7.7
99
hooks:

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
## What is it?
2222

23-
**pandas** is a Python package providing fast, flexible, and expressive data
23+
**pandas** is a Python package that provides fast, flexible, and expressive data
2424
structures designed to make working with "relational" or "labeled" data both
2525
easy and intuitive. It aims to be the fundamental high-level building block for
2626
doing practical, **real world** data analysis in Python. Additionally, it has
@@ -154,11 +154,11 @@ For usage questions, the best place to go to is [StackOverflow](https://stackove
154154
Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata).
155155

156156
## Discussion and Development
157-
Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
157+
Most development discussions take place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
158158

159159
## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
160160

161-
All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
161+
All contributions, bug reports, bug fixes, documentation improvements, enhancements, and ideas are welcome.
162162

163163
A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas.pydata.org/docs/dev/development/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
164164

asv_bench/benchmarks/arithmetic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ def setup(self, offset):
466466
self.rng = rng
467467

468468
def time_apply_index(self, offset):
469-
offset.apply_index(self.rng)
469+
self.rng + offset
470470

471471

472472
class BinaryOpsMultiIndex:

asv_bench/benchmarks/io/json.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,18 @@ def time_read_json_lines(self, index):
5353
def time_read_json_lines_concat(self, index):
5454
concat(read_json(self.fname, orient="records", lines=True, chunksize=25000))
5555

56+
def time_read_json_lines_nrows(self, index):
57+
read_json(self.fname, orient="records", lines=True, nrows=25000)
58+
5659
def peakmem_read_json_lines(self, index):
5760
read_json(self.fname, orient="records", lines=True)
5861

5962
def peakmem_read_json_lines_concat(self, index):
6063
concat(read_json(self.fname, orient="records", lines=True, chunksize=25000))
6164

65+
def peakmem_read_json_lines_nrows(self, index):
66+
read_json(self.fname, orient="records", lines=True, nrows=15000)
67+
6268

6369
class ToJSON(BaseIO):
6470

asv_bench/benchmarks/pandas_vb_common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
np.uint8,
3434
]
3535
datetime_dtypes = [np.datetime64, np.timedelta64]
36-
string_dtypes = [np.object]
36+
string_dtypes = [object]
3737
try:
3838
extension_dtypes = [
3939
pd.Int8Dtype,

asv_bench/benchmarks/series_methods.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,15 @@ def time_isin_nan_values(self):
5858

5959
class IsInForObjects:
6060
def setup(self):
61-
self.s_nans = Series(np.full(10 ** 4, np.nan)).astype(np.object)
62-
self.vals_nans = np.full(10 ** 4, np.nan).astype(np.object)
63-
self.s_short = Series(np.arange(2)).astype(np.object)
64-
self.s_long = Series(np.arange(10 ** 5)).astype(np.object)
65-
self.vals_short = np.arange(2).astype(np.object)
66-
self.vals_long = np.arange(10 ** 5).astype(np.object)
61+
self.s_nans = Series(np.full(10 ** 4, np.nan)).astype(object)
62+
self.vals_nans = np.full(10 ** 4, np.nan).astype(object)
63+
self.s_short = Series(np.arange(2)).astype(object)
64+
self.s_long = Series(np.arange(10 ** 5)).astype(object)
65+
self.vals_short = np.arange(2).astype(object)
66+
self.vals_long = np.arange(10 ** 5).astype(object)
6767
# because of nans floats are special:
68-
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float)).astype(
69-
np.object
70-
)
71-
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float).astype(np.object)
68+
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float)).astype(object)
69+
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float).astype(object)
7270

7371
def time_isin_nans(self):
7472
# if nan-objects are different objects,

asv_bench/benchmarks/sparse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def time_series_to_frame(self):
3232

3333
class SparseArrayConstructor:
3434

35-
params = ([0.1, 0.01], [0, np.nan], [np.int64, np.float64, np.object])
35+
params = ([0.1, 0.01], [0, np.nan], [np.int64, np.float64, object])
3636
param_names = ["dense_proportion", "fill_value", "dtype"]
3737

3838
def setup(self, dense_proportion, fill_value, dtype):

ci/azure/windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
CONDA_PY: "36"
1414
PATTERN: "not slow and not network"
1515

16-
py37_np141:
16+
py37_np18:
1717
ENV_FILE: ci/deps/azure-windows-37.yaml
1818
CONDA_PY: "37"
1919
PATTERN: "not slow and not network"

ci/deps/azure-windows-37.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ dependencies:
2222
- matplotlib=2.2.*
2323
- moto
2424
- numexpr
25-
- numpy=1.14.*
25+
- numpy=1.18.*
2626
- openpyxl
2727
- pyarrow=0.14
2828
- pytables

ci/deps/travis-36-locale.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ dependencies:
2727
- numexpr
2828
- numpy
2929
- openpyxl
30-
- pandas-gbq=0.8.0
30+
- pandas-gbq=0.12.0
3131
- psycopg2=2.6.2
3232
- pymysql=0.7.11
3333
- pytables

doc/source/development/contributing.rst

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ want to clone your fork to your machine::
136136
This creates the directory `pandas-yourname` and connects your repository to
137137
the upstream (main project) *pandas* repository.
138138

139+
Note that performing a shallow clone (with ``--depth==N``, for some ``N`` greater
140+
or equal to 1) might break some tests and features as ``pd.show_versions()``
141+
as the version number cannot be computed anymore.
142+
139143
.. _contributing.dev_env:
140144

141145
Creating a development environment
@@ -270,7 +274,7 @@ Creating a Python environment (pip)
270274
If you aren't using conda for your development environment, follow these instructions.
271275
You'll need to have at least Python 3.6.1 installed on your system.
272276

273-
**Unix**/**Mac OS**
277+
**Unix**/**Mac OS with virtualenv**
274278

275279
.. code-block:: bash
276280
@@ -286,7 +290,31 @@ You'll need to have at least Python 3.6.1 installed on your system.
286290
python -m pip install -r requirements-dev.txt
287291
288292
# Build and install pandas
289-
python setup.py build_ext --inplace -j 0
293+
python setup.py build_ext --inplace -j 4
294+
python -m pip install -e . --no-build-isolation --no-use-pep517
295+
296+
**Unix**/**Mac OS with pyenv**
297+
298+
Consult the docs for setting up pyenv `here <https://github.com/pyenv/pyenv>`__.
299+
300+
.. code-block:: bash
301+
302+
# Create a virtual environment
303+
# Use an ENV_DIR of your choice. We'll use ~/Users/<yourname>/.pyenv/versions/pandas-dev
304+
305+
pyenv virtualenv <version> <name-to-give-it>
306+
307+
# For instance:
308+
pyenv virtualenv 3.7.6 pandas-dev
309+
310+
# Activate the virtualenv
311+
pyenv activate pandas-dev
312+
313+
# Now install the build dependencies in the cloned pandas repo
314+
python -m pip install -r requirements-dev.txt
315+
316+
# Build and install pandas
317+
python setup.py build_ext --inplace -j 4
290318
python -m pip install -e . --no-build-isolation --no-use-pep517
291319
292320
**Windows**
@@ -312,7 +340,7 @@ should already exist.
312340
python -m pip install -r requirements-dev.txt
313341
314342
# Build and install pandas
315-
python setup.py build_ext --inplace -j 0
343+
python setup.py build_ext --inplace -j 4
316344
python -m pip install -e . --no-build-isolation --no-use-pep517
317345
318346
Creating a branch

doc/source/ecosystem.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,20 @@ provide a pandas-like and pandas-compatible toolkit for analytics on multi-
320320
dimensional arrays, rather than the tabular data for which pandas excels.
321321

322322

323+
.. _ecosystem.io:
324+
325+
IO
326+
--
327+
328+
`BCPandas <https://github.com/yehoshuadimarsky/bcpandas>`__
329+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
330+
331+
BCPandas provides high performance writes from pandas to Microsoft SQL Server,
332+
far exceeding the performance of the native ``df.to_sql`` method. Internally, it uses
333+
Microsoft's BCP utility, but the complexity is fully abstracted away from the end user.
334+
Rigorously tested, it is a complete replacement for ``df.to_sql``.
335+
336+
323337
.. _ecosystem.out-of-core:
324338

325339
Out-of-core

doc/source/getting_started/comparison/comparison_with_sas.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ Reading external data
115115

116116
Like SAS, pandas provides utilities for reading in data from
117117
many formats. The ``tips`` dataset, found within the pandas
118-
tests (`csv <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv>`_)
118+
tests (`csv <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/io/data/csv/tips.csv>`_)
119119
will be used in many of the following examples.
120120

121121
SAS provides ``PROC IMPORT`` to read csv data into a data set.
@@ -131,7 +131,7 @@ The pandas method is :func:`read_csv`, which works similarly.
131131
.. ipython:: python
132132
133133
url = ('https://raw.github.com/pandas-dev/'
134-
'pandas/master/pandas/tests/data/tips.csv')
134+
'pandas/master/pandas/tests/io/data/csv/tips.csv')
135135
tips = pd.read_csv(url)
136136
tips.head()
137137

doc/source/getting_started/comparison/comparison_with_sql.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ structure.
2525
.. ipython:: python
2626
2727
url = ('https://raw.github.com/pandas-dev'
28-
'/pandas/master/pandas/tests/data/tips.csv')
28+
'/pandas/master/pandas/tests/io/data/csv/tips.csv')
2929
tips = pd.read_csv(url)
3030
tips.head()
3131

doc/source/getting_started/comparison/comparison_with_stata.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ Reading external data
112112

113113
Like Stata, pandas provides utilities for reading in data from
114114
many formats. The ``tips`` data set, found within the pandas
115-
tests (`csv <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv>`_)
115+
tests (`csv <https://raw.github.com/pandas-dev/pandas/master/pandas/tests/io/data/csv/tips.csv>`_)
116116
will be used in many of the following examples.
117117

118118
Stata provides ``import delimited`` to read csv data into a data set in memory.
@@ -128,7 +128,7 @@ the data set if presented with a url.
128128
.. ipython:: python
129129
130130
url = ('https://raw.github.com/pandas-dev'
131-
'/pandas/master/pandas/tests/data/tips.csv')
131+
'/pandas/master/pandas/tests/io/data/csv/tips.csv')
132132
tips = pd.read_csv(url)
133133
tips.head()
134134

doc/source/getting_started/install.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ lxml 3.8.0 HTML parser for read_html (see :ref
274274
matplotlib 2.2.2 Visualization
275275
numba 0.46.0 Alternative execution engine for rolling operations
276276
openpyxl 2.5.7 Reading / writing for xlsx files
277-
pandas-gbq 0.8.0 Google Big Query access
277+
pandas-gbq 0.12.0 Google Big Query access
278278
psycopg2 PostgreSQL engine for sqlalchemy
279279
pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and feather reading / writing
280280
pymysql 0.7.11 MySQL engine for sqlalchemy

doc/source/getting_started/intro_tutorials/01_table_oriented.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ I want to store passenger data of the Titanic. For a number of passengers, I kno
5151
df
5252
5353
To manually store data in a table, create a ``DataFrame``. When using a Python dictionary of lists, the dictionary keys will be used as column headers and
54-
the values in each list as rows of the ``DataFrame``.
54+
the values in each list as columns of the ``DataFrame``.
5555

5656
.. raw:: html
5757

@@ -215,4 +215,4 @@ A more extended explanation to ``DataFrame`` and ``Series`` is provided in the :
215215

216216
.. raw:: html
217217

218-
</div>
218+
</div>

doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ I want the values for the three stations as separate columns next to each other
196196
197197
no2_subset.pivot(columns="location", values="value")
198198
199-
The :meth:`~pandas.pivot_table` function is purely reshaping of the data: a single value
199+
The :meth:`~pandas.pivot` function is purely reshaping of the data: a single value
200200
for each index/column combination is required.
201201

202202
.. raw:: html

doc/source/reference/frame.rst

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ Conversion
4747
DataFrame.convert_dtypes
4848
DataFrame.infer_objects
4949
DataFrame.copy
50-
DataFrame.isna
51-
DataFrame.notna
5250
DataFrame.bool
5351

5452
Indexing, iteration
@@ -211,10 +209,18 @@ Missing data handling
211209
.. autosummary::
212210
:toctree: api/
213211

212+
DataFrame.backfill
213+
DataFrame.bfill
214214
DataFrame.dropna
215+
DataFrame.ffill
215216
DataFrame.fillna
216-
DataFrame.replace
217217
DataFrame.interpolate
218+
DataFrame.isna
219+
DataFrame.isnull
220+
DataFrame.notna
221+
DataFrame.notnull
222+
DataFrame.pad
223+
DataFrame.replace
218224

219225
Reshaping, sorting, transposing
220226
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/reference/general_utility_functions.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,11 @@ Exceptions and warnings
3838
errors.AccessorRegistrationWarning
3939
errors.DtypeWarning
4040
errors.EmptyDataError
41-
errors.OutOfBoundsDatetime
41+
errors.InvalidIndexError
4242
errors.MergeError
4343
errors.NullFrequencyError
4444
errors.NumbaUtilError
45+
errors.OutOfBoundsDatetime
4546
errors.ParserError
4647
errors.ParserWarning
4748
errors.PerformanceWarning

doc/source/reference/groupby.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Computations / descriptive stats
5050
GroupBy.all
5151
GroupBy.any
5252
GroupBy.bfill
53+
GroupBy.backfill
5354
GroupBy.count
5455
GroupBy.cumcount
5556
GroupBy.cummax
@@ -67,6 +68,7 @@ Computations / descriptive stats
6768
GroupBy.ngroup
6869
GroupBy.nth
6970
GroupBy.ohlc
71+
GroupBy.pad
7072
GroupBy.prod
7173
GroupBy.rank
7274
GroupBy.pct_change
@@ -88,10 +90,12 @@ application to columns of a specific data type.
8890

8991
DataFrameGroupBy.all
9092
DataFrameGroupBy.any
93+
DataFrameGroupBy.backfill
9194
DataFrameGroupBy.bfill
9295
DataFrameGroupBy.corr
9396
DataFrameGroupBy.count
9497
DataFrameGroupBy.cov
98+
DataFrameGroupBy.cumcount
9599
DataFrameGroupBy.cummax
96100
DataFrameGroupBy.cummin
97101
DataFrameGroupBy.cumprod
@@ -106,11 +110,13 @@ application to columns of a specific data type.
106110
DataFrameGroupBy.idxmin
107111
DataFrameGroupBy.mad
108112
DataFrameGroupBy.nunique
113+
DataFrameGroupBy.pad
109114
DataFrameGroupBy.pct_change
110115
DataFrameGroupBy.plot
111116
DataFrameGroupBy.quantile
112117
DataFrameGroupBy.rank
113118
DataFrameGroupBy.resample
119+
DataFrameGroupBy.sample
114120
DataFrameGroupBy.shift
115121
DataFrameGroupBy.size
116122
DataFrameGroupBy.skew

doc/source/reference/series.rst

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,11 +214,18 @@ Missing data handling
214214
.. autosummary::
215215
:toctree: api/
216216

217-
Series.isna
218-
Series.notna
217+
Series.backfill
218+
Series.bfill
219219
Series.dropna
220+
Series.ffill
220221
Series.fillna
221222
Series.interpolate
223+
Series.isna
224+
Series.isnull
225+
Series.notna
226+
Series.notnull
227+
Series.pad
228+
Series.replace
222229

223230
Reshaping, sorting
224231
------------------

0 commit comments

Comments
 (0)