Skip to content

Commit 4ccf711

Browse files
author
MomIsBestFriend
committed
Merge remote-tracking branch 'upstream/master' into CI-check-for-no-internal-functions-across-modules
2 parents d9f3928 + e4c17f7 commit 4ccf711

File tree

320 files changed

+9040
-7770
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

320 files changed

+9040
-7770
lines changed

asv_bench/benchmarks/frame_methods.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,4 +619,17 @@ def time_select_dtypes(self, n):
619619
self.df.select_dtypes(include="int")
620620

621621

622+
class MemoryUsage:
623+
def setup(self):
624+
self.df = DataFrame(np.random.randn(100000, 2), columns=list("AB"))
625+
self.df2 = self.df.copy()
626+
self.df2["A"] = self.df2["A"].astype("object")
627+
628+
def time_memory_usage(self):
629+
self.df.memory_usage(deep=True)
630+
631+
def time_memory_usage_object_dtype(self):
632+
self.df2.memory_usage(deep=True)
633+
634+
622635
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/indexing.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,31 @@ def time_frame_getitem_single_column_int(self):
308308
self.df_int_col[0]
309309

310310

311+
class IndexSingleRow:
312+
params = [True, False]
313+
param_names = ["unique_cols"]
314+
315+
def setup(self, unique_cols):
316+
arr = np.arange(10 ** 7).reshape(-1, 10)
317+
df = DataFrame(arr)
318+
dtypes = ["u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8", "f8", "f4"]
319+
for i, d in enumerate(dtypes):
320+
df[i] = df[i].astype(d)
321+
322+
if not unique_cols:
323+
# GH#33032 single-row lookups with non-unique columns were
324+
# 15x slower than with unique columns
325+
df.columns = ["A", "A"] + list(df.columns[2:])
326+
327+
self.df = df
328+
329+
def time_iloc_row(self, unique_cols):
330+
self.df.iloc[10000]
331+
332+
def time_loc_row(self, unique_cols):
333+
self.df.loc[10000]
334+
335+
311336
class AssignTimeseriesIndex:
312337
def setup(self):
313338
N = 100000

asv_bench/benchmarks/series_methods.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -223,27 +223,27 @@ def time_series_datetimeindex_repr(self):
223223

224224
class All:
225225

226-
params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
227-
param_names = ["N", "case"]
226+
params = [[10 ** 3, 10 ** 6], ["fast", "slow"], ["bool", "boolean"]]
227+
param_names = ["N", "case", "dtype"]
228228

229-
def setup(self, N, case):
229+
def setup(self, N, case, dtype):
230230
val = case != "fast"
231-
self.s = Series([val] * N)
231+
self.s = Series([val] * N, dtype=dtype)
232232

233-
def time_all(self, N, case):
233+
def time_all(self, N, case, dtype):
234234
self.s.all()
235235

236236

237237
class Any:
238238

239-
params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
240-
param_names = ["N", "case"]
239+
params = [[10 ** 3, 10 ** 6], ["fast", "slow"], ["bool", "boolean"]]
240+
param_names = ["N", "case", "dtype"]
241241

242-
def setup(self, N, case):
242+
def setup(self, N, case, dtype):
243243
val = case == "fast"
244-
self.s = Series([val] * N)
244+
self.s = Series([val] * N, dtype=dtype)
245245

246-
def time_any(self, N, case):
246+
def time_any(self, N, case, dtype):
247247
self.s.any()
248248

249249

@@ -265,11 +265,14 @@ class NanOps:
265265
"prod",
266266
],
267267
[10 ** 3, 10 ** 6],
268-
["int8", "int32", "int64", "float64"],
268+
["int8", "int32", "int64", "float64", "Int64", "boolean"],
269269
]
270270
param_names = ["func", "N", "dtype"]
271271

272272
def setup(self, func, N, dtype):
273+
if func == "argmax" and dtype in {"Int64", "boolean"}:
274+
# Skip argmax for nullable int since this doesn't work yet (GH-24382)
275+
raise NotImplementedError
273276
self.s = Series([1] * N, dtype=dtype)
274277
self.func = getattr(self.s, func)
275278

asv_bench/benchmarks/sparse.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
import scipy.sparse
33

44
import pandas as pd
5-
from pandas import MultiIndex, Series, SparseArray, date_range
5+
from pandas import MultiIndex, Series, date_range
6+
from pandas.arrays import SparseArray
67

78

89
def make_array(size, dense_proportion, fill_value, dtype):

asv_bench/benchmarks/stat_ops.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,17 @@
77

88
class FrameOps:
99

10-
params = [ops, ["float", "int"], [0, 1]]
10+
params = [ops, ["float", "int", "Int64"], [0, 1]]
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14-
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
14+
if op == "mad" and dtype == "Int64" and axis == 1:
15+
# GH-33036
16+
raise NotImplementedError
17+
values = np.random.randn(100000, 4)
18+
if dtype == "Int64":
19+
values = values.astype(int)
20+
df = pd.DataFrame(values).astype(dtype)
1521
self.df_func = getattr(df, op)
1622

1723
def time_op(self, op, dtype, axis):

asv_bench/benchmarks/timeseries.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,15 +336,33 @@ def time_infer_quarter(self):
336336

337337
class ToDatetimeFormat:
338338
def setup(self):
339-
self.s = Series(["19MAY11", "19MAY11:00:00:00"] * 100000)
339+
N = 100000
340+
self.s = Series(["19MAY11", "19MAY11:00:00:00"] * N)
340341
self.s2 = self.s.str.replace(":\\S+$", "")
341342

343+
self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
344+
self.diff_offset = [
345+
f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
346+
] * int(N / 10)
347+
342348
def time_exact(self):
343349
to_datetime(self.s2, format="%d%b%y")
344350

345351
def time_no_exact(self):
346352
to_datetime(self.s, format="%d%b%y", exact=False)
347353

354+
def time_same_offset(self):
355+
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
356+
357+
def time_different_offset(self):
358+
to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
359+
360+
def time_same_offset_to_utc(self):
361+
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
362+
363+
def time_different_offset_to_utc(self):
364+
to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
365+
348366

349367
class ToDatetimeCache:
350368

ci/code_checks.sh

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -287,18 +287,12 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
287287
pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
288288
RET=$(($RET + $?)) ; echo $MSG "DONE"
289289

290-
MSG='Doctests datetimes.py' ; echo $MSG
291-
pytest -q --doctest-modules pandas/core/tools/datetimes.py
290+
MSG='Doctests tools' ; echo $MSG
291+
pytest -q --doctest-modules pandas/core/tools/
292292
RET=$(($RET + $?)) ; echo $MSG "DONE"
293293

294-
MSG='Doctests top-level reshaping functions' ; echo $MSG
295-
pytest -q --doctest-modules \
296-
pandas/core/reshape/concat.py \
297-
pandas/core/reshape/pivot.py \
298-
pandas/core/reshape/reshape.py \
299-
pandas/core/reshape/tile.py \
300-
pandas/core/reshape/melt.py \
301-
-k"-crosstab -pivot_table -cut"
294+
MSG='Doctests reshaping functions' ; echo $MSG
295+
pytest -q --doctest-modules pandas/core/reshape/
302296
RET=$(($RET + $?)) ; echo $MSG "DONE"
303297

304298
MSG='Doctests interval classes' ; echo $MSG
@@ -333,6 +327,14 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
333327
MSG='Doctests generic.py' ; echo $MSG
334328
pytest -q --doctest-modules pandas/core/generic.py
335329
RET=$(($RET + $?)) ; echo $MSG "DONE"
330+
331+
MSG='Doctests tseries' ; echo $MSG
332+
pytest -q --doctest-modules pandas/tseries/
333+
RET=$(($RET + $?)) ; echo $MSG "DONE"
334+
335+
MSG='Doctests computation' ; echo $MSG
336+
pytest -q --doctest-modules pandas/core/computation/
337+
RET=$(($RET + $?)) ; echo $MSG "DONE"
336338
fi
337339

338340
### DOCSTRINGS ###

doc/source/development/contributing_docstring.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ backticks. The following are considered inline code:
160160
161161
.. _docstring.short_summary:
162162

163-
Section 1: Short summary
163+
Section 1: short summary
164164
~~~~~~~~~~~~~~~~~~~~~~~~
165165

166166
The short summary is a single sentence that expresses what the function does in
@@ -228,7 +228,7 @@ infinitive verb.
228228
229229
.. _docstring.extended_summary:
230230

231-
Section 2: Extended summary
231+
Section 2: extended summary
232232
~~~~~~~~~~~~~~~~~~~~~~~~~~~
233233

234234
The extended summary provides details on what the function does. It should not
@@ -259,7 +259,7 @@ their use cases, if it is not too generic.
259259
260260
.. _docstring.parameters:
261261

262-
Section 3: Parameters
262+
Section 3: parameters
263263
~~~~~~~~~~~~~~~~~~~~~
264264

265265
The details of the parameters will be added in this section. This section has
@@ -424,7 +424,7 @@ For axis, the convention is to use something like:
424424

425425
.. _docstring.returns:
426426

427-
Section 4: Returns or Yields
427+
Section 4: returns or yields
428428
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
429429

430430
If the method returns a value, it will be documented in this section. Also
@@ -505,7 +505,7 @@ If the method yields its value:
505505
506506
.. _docstring.see_also:
507507

508-
Section 5: See Also
508+
Section 5: see also
509509
~~~~~~~~~~~~~~~~~~~
510510

511511
This section is used to let users know about pandas functionality
@@ -583,7 +583,7 @@ For example:
583583
584584
.. _docstring.notes:
585585

586-
Section 6: Notes
586+
Section 6: notes
587587
~~~~~~~~~~~~~~~~
588588

589589
This is an optional section used for notes about the implementation of the
@@ -597,7 +597,7 @@ This section follows the same format as the extended summary section.
597597

598598
.. _docstring.examples:
599599

600-
Section 7: Examples
600+
Section 7: examples
601601
~~~~~~~~~~~~~~~~~~~
602602

603603
This is one of the most important sections of a docstring, despite being
@@ -998,4 +998,4 @@ mapping function names to docstrings. Wherever possible, we prefer using
998998

999999
See ``pandas.core.generic.NDFrame.fillna`` for an example template, and
10001000
``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna``
1001-
for the filled versions.
1001+
for the filled versions.

doc/source/development/developer.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ for each column, *including the index columns*. This has JSON form:
6262
6363
See below for the detailed specification for these.
6464

65-
Index Metadata Descriptors
65+
Index metadata descriptors
6666
~~~~~~~~~~~~~~~~~~~~~~~~~~
6767

6868
``RangeIndex`` can be stored as metadata only, not requiring serialization. The
@@ -89,7 +89,7 @@ with other column names) a disambiguating name with pattern matching
8989
columns, ``name`` attribute is always stored in the column descriptors as
9090
above.
9191

92-
Column Metadata
92+
Column metadata
9393
~~~~~~~~~~~~~~~
9494

9595
``pandas_type`` is the logical type of the column, and is one of:
@@ -182,4 +182,4 @@ As an example of fully-formed metadata:
182182
'creator': {
183183
'library': 'pyarrow',
184184
'version': '0.13.0'
185-
}}
185+
}}

doc/source/development/extending.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ will
210210

211211
.. _extending.extension.ufunc:
212212

213-
NumPy Universal Functions
213+
NumPy universal functions
214214
^^^^^^^^^^^^^^^^^^^^^^^^^
215215

216216
:class:`Series` implements ``__array_ufunc__``. As part of the implementation,
@@ -501,4 +501,4 @@ registers the default "matplotlib" backend as follows.
501501
502502
503503
More information on how to implement a third-party plotting backend can be found at
504-
https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py#L1.
504+
https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py#L1.

doc/source/development/maintaining.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.. _maintaining:
22

33
******************
4-
Pandas Maintenance
4+
pandas maintenance
55
******************
66

77
This guide is for pandas' maintainers. It may also be interesting to contributors
@@ -41,7 +41,7 @@ reading.
4141

4242
.. _maintaining.triage:
4343

44-
Issue Triage
44+
Issue triage
4545
------------
4646

4747

@@ -123,7 +123,7 @@ Here's a typical workflow for triaging a newly opened issue.
123123

124124
.. _maintaining.closing:
125125

126-
Closing Issues
126+
Closing issues
127127
--------------
128128

129129
Be delicate here: many people interpret closing an issue as us saying that the
@@ -132,7 +132,7 @@ respond or self-close their issue if it's determined that the behavior is not a
132132
or the feature is out of scope. Sometimes reporters just go away though, and
133133
we'll close the issue after the conversation has died.
134134

135-
Reviewing Pull Requests
135+
Reviewing pull requests
136136
-----------------------
137137

138138
Anybody can review a pull request: regular contributors, triagers, or core-team
@@ -144,7 +144,7 @@ members. Here are some guidelines to check.
144144
* User-facing changes should have a whatsnew in the appropriate file.
145145
* Regression tests should reference the original GitHub issue number like ``# GH-1234``.
146146

147-
Cleaning up old Issues
147+
Cleaning up old issues
148148
----------------------
149149

150150
Every open issue in pandas has a cost. Open issues make finding duplicates harder,
@@ -164,7 +164,7 @@ If an older issue lacks a reproducible example, label it as "Needs Info" and
164164
ask them to provide one (or write one yourself if possible). If one isn't
165165
provide reasonably soon, close it according to the policies in :ref:`maintaining.closing`.
166166

167-
Cleaning up old Pull Requests
167+
Cleaning up old pull requests
168168
-----------------------------
169169

170170
Occasionally, contributors are unable to finish off a pull request.

doc/source/development/meeting.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.. _meeting:
22

33
==================
4-
Developer Meetings
4+
Developer meetings
55
==================
66

77
We hold regular developer meetings on the second Wednesday
@@ -29,4 +29,3 @@ You can subscribe to this calendar with the following links:
2929

3030
Additionally, we'll sometimes have one-off meetings on specific topics.
3131
These will be published on the same calendar.
32-

doc/source/getting_started/intro_tutorials/01_table_oriented.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ documentation.
2626
</li>
2727
</ul>
2828

29-
Pandas data table representation
29+
pandas data table representation
3030
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3131

3232
.. image:: ../../_static/schemas/01_table_dataframe.svg

0 commit comments

Comments
 (0)