Skip to content

Commit 3682687

Browse files
Merge remote-tracking branch 'upstream/master' into GH36666
2 parents 0bc1a89 + 838cbd4 commit 3682687

File tree

243 files changed

+7700
-7293
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

243 files changed

+7700
-7293
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ jobs:
3737
ci/code_checks.sh lint
3838
if: always()
3939

40-
- name: Dependencies consistency
41-
run: |
42-
source activate pandas-dev
43-
ci/code_checks.sh dependencies
44-
if: always()
45-
4640
- name: Checks on imported code
4741
run: |
4842
source activate pandas-dev

ci/code_checks.sh

Lines changed: 15 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@
1515
# $ ./ci/code_checks.sh code # checks on imported code
1616
# $ ./ci/code_checks.sh doctests # run doctests
1717
# $ ./ci/code_checks.sh docstrings # validate docstring errors
18-
# $ ./ci/code_checks.sh dependencies # check that dependencies are consistent
1918
# $ ./ci/code_checks.sh typing # run static type analysis
2019

21-
[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "dependencies" || "$1" == "typing" ]] || \
22-
{ echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|dependencies|typing]"; exit 9999; }
20+
[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" ]] || \
21+
{ echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|typing]"; exit 9999; }
2322

2423
BASE_DIR="$(dirname $0)/.."
2524
RET=0
@@ -38,6 +37,12 @@ function invgrep {
3837
return $((! $EXIT_STATUS))
3938
}
4039

40+
function check_namespace {
41+
local -r CLASS="${1}"
42+
grep -R -l --include "*.py" " ${CLASS}(" pandas/tests | xargs grep -n "pd\.${CLASS}("
43+
test $? -gt 0
44+
}
45+
4146
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
4247
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
4348
INVGREP_PREPEND="##[error]"
@@ -48,31 +53,6 @@ fi
4853
### LINTING ###
4954
if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5055

51-
echo "black --version"
52-
black --version
53-
54-
MSG='Checking black formatting' ; echo $MSG
55-
black . --check
56-
RET=$(($RET + $?)) ; echo $MSG "DONE"
57-
58-
# `setup.cfg` contains the list of error codes that are being ignored in flake8
59-
60-
echo "flake8 --version"
61-
flake8 --version
62-
63-
# pandas/_libs/src is C code, so no need to search there.
64-
MSG='Linting .py code' ; echo $MSG
65-
flake8 --format="$FLAKE8_FORMAT" .
66-
RET=$(($RET + $?)) ; echo $MSG "DONE"
67-
68-
MSG='Linting .pyx and .pxd code' ; echo $MSG
69-
flake8 --format="$FLAKE8_FORMAT" pandas --append-config=flake8/cython.cfg
70-
RET=$(($RET + $?)) ; echo $MSG "DONE"
71-
72-
MSG='Linting .pxi.in' ; echo $MSG
73-
flake8 --format="$FLAKE8_FORMAT" pandas/_libs --append-config=flake8/cython-template.cfg
74-
RET=$(($RET + $?)) ; echo $MSG "DONE"
75-
7656
# Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
7757
# it doesn't make a difference, but we want to be internally consistent.
7858
# Note: this grep pattern is (intended to be) equivalent to the python
@@ -125,19 +105,6 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
125105
fi
126106
RET=$(($RET + $?)) ; echo $MSG "DONE"
127107

128-
echo "isort --version-number"
129-
isort --version-number
130-
131-
# Imports - Check formatting using isort see setup.cfg for settings
132-
MSG='Check import format using isort' ; echo $MSG
133-
ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts web"
134-
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
135-
eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
136-
else
137-
eval $ISORT_CMD
138-
fi
139-
RET=$(($RET + $?)) ; echo $MSG "DONE"
140-
141108
fi
142109

143110
### PATTERNS ###
@@ -234,6 +201,13 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
234201
MSG='Check code for instances of os.remove' ; echo $MSG
235202
invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/
236203
RET=$(($RET + $?)) ; echo $MSG "DONE"
204+
205+
MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
206+
check_namespace "Series"
207+
RET=$(($RET + $?))
208+
check_namespace "DataFrame"
209+
RET=$(($RET + $?))
210+
echo $MSG "DONE"
237211
fi
238212

239213
### CODE ###
@@ -354,15 +328,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
354328

355329
fi
356330

357-
### DEPENDENCIES ###
358-
if [[ -z "$CHECK" || "$CHECK" == "dependencies" ]]; then
359-
360-
MSG='Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG
361-
$BASE_DIR/scripts/generate_pip_deps_from_conda.py --compare --azure
362-
RET=$(($RET + $?)) ; echo $MSG "DONE"
363-
364-
fi
365-
366331
### TYPING ###
367332
if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then
368333

@@ -374,5 +339,4 @@ if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then
374339
RET=$(($RET + $?)) ; echo $MSG "DONE"
375340
fi
376341

377-
378342
exit $RET

doc/source/getting_started/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ pandas has great support for time series and has an extensive set of tools for w
533533
<div id="collapseTen" class="collapse" data-parent="#accordion">
534534
<div class="card-body">
535535

536-
Data sets do not only contain numerical data. pandas provides a wide range of functions to cleaning textual data and extract useful information from it.
536+
Data sets do not only contain numerical data. pandas provides a wide range of functions to clean textual data and extract useful information from it.
537537

538538
.. raw:: html
539539

doc/source/getting_started/intro_tutorials/10_text_data.rst

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,15 @@ How to manipulate textual data?
6666
<ul class="task-bullet">
6767
<li>
6868

69-
Make all name characters lowercase
69+
Make all name characters lowercase.
7070

7171
.. ipython:: python
7272
7373
titanic["Name"].str.lower()
7474
7575
To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column
76-
(see :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
77-
apply the ``lower`` method. As such, each of the strings is converted element wise.
76+
(see the :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and
77+
apply the ``lower`` method. As such, each of the strings is converted element-wise.
7878

7979
.. raw:: html
8080

@@ -86,15 +86,15 @@ having a ``dt`` accessor, a number of
8686
specialized string methods are available when using the ``str``
8787
accessor. These methods have in general matching names with the
8888
equivalent built-in string methods for single elements, but are applied
89-
element-wise (remember :ref:`element wise calculations <10min_tut_05_columns>`?)
89+
element-wise (remember :ref:`element-wise calculations <10min_tut_05_columns>`?)
9090
on each of the values of the columns.
9191

9292
.. raw:: html
9393

9494
<ul class="task-bullet">
9595
<li>
9696

97-
Create a new column ``Surname`` that contains the surname of the Passengers by extracting the part before the comma.
97+
Create a new column ``Surname`` that contains the surname of the passengers by extracting the part before the comma.
9898

9999
.. ipython:: python
100100
@@ -135,7 +135,7 @@ More information on extracting parts of strings is available in the user guide s
135135
<ul class="task-bullet">
136136
<li>
137137

138-
Extract the passenger data about the Countesses on board of the Titanic.
138+
Extract the passenger data about the countesses on board of the Titanic.
139139

140140
.. ipython:: python
141141
@@ -145,15 +145,15 @@ Extract the passenger data about the Countesses on board of the Titanic.
145145
146146
titanic[titanic["Name"].str.contains("Countess")]
147147
148-
(*Interested in her story? See *\ `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
148+
(*Interested in her story? See* `Wikipedia <https://en.wikipedia.org/wiki/No%C3%ABl_Leslie,_Countess_of_Rothes>`__\ *!*)
149149

150150
The string method :meth:`Series.str.contains` checks for each of the values in the
151151
column ``Name`` if the string contains the word ``Countess`` and returns
152-
for each of the values ``True`` (``Countess`` is part of the name) of
152+
for each of the values ``True`` (``Countess`` is part of the name) or
153153
``False`` (``Countess`` is not part of the name). This output can be used
154154
to subselect the data using conditional (boolean) indexing introduced in
155155
the :ref:`subsetting of data tutorial <10min_tut_03_subset>`. As there was
156-
only one Countess on the Titanic, we get one row as a result.
156+
only one countess on the Titanic, we get one row as a result.
157157

158158
.. raw:: html
159159

@@ -220,7 +220,7 @@ we can do a selection using the ``loc`` operator, introduced in the
220220
<ul class="task-bullet">
221221
<li>
222222

223-
In the "Sex" column, replace values of "male" by "M" and values of "female" by "F"
223+
In the "Sex" column, replace values of "male" by "M" and values of "female" by "F".
224224

225225
.. ipython:: python
226226
@@ -256,7 +256,7 @@ a ``dictionary`` to define the mapping ``{from : to}``.
256256
<h4>REMEMBER</h4>
257257

258258
- String methods are available using the ``str`` accessor.
259-
- String methods work element wise and can be used for conditional
259+
- String methods work element-wise and can be used for conditional
260260
indexing.
261261
- The ``replace`` method is a convenient method to convert values
262262
according to a given dictionary.

doc/source/index.rst.template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pandas documentation
1717
`Source Repository <https://github.com/pandas-dev/pandas>`__ |
1818
`Issues & Ideas <https://github.com/pandas-dev/pandas/issues>`__ |
1919
`Q&A Support <https://stackoverflow.com/questions/tagged/pandas>`__ |
20-
`Mailing List <https://groups.google.com/forum/#!forum/pydata>`__
20+
`Mailing List <https://groups.google.com/g/pydata>`__
2121

2222
:mod:`pandas` is an open source, BSD-licensed library providing high-performance,
2323
easy-to-use data structures and data analysis tools for the `Python <https://www.python.org/>`__

doc/source/user_guide/computation.rst

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -652,9 +652,9 @@ parameter:
652652
:header: "``closed``", "Description", "Default for"
653653
:widths: 20, 30, 30
654654

655-
``right``, close right endpoint, time-based windows
655+
``right``, close right endpoint,
656656
``left``, close left endpoint,
657-
``both``, close both endpoints, fixed windows
657+
``both``, close both endpoints,
658658
``neither``, open endpoints,
659659

660660
For example, having the right endpoint open is useful in many problems that require that there is no contamination
@@ -681,9 +681,6 @@ from present information back to past information. This allows the rolling windo
681681
682682
df
683683
684-
Currently, this feature is only implemented for time-based windows.
685-
For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed.
686-
687684
.. _stats.iter_rolling_window:
688685

689686
Iteration over window:

doc/source/whatsnew/v1.1.4.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Fixed regressions
2222
- Fixed regression in :class:`RollingGroupby` causing a segmentation fault with Index of dtype object (:issue:`36727`)
2323
- Fixed regression in :meth:`DataFrame.resample(...).apply(...)` raised ``AttributeError`` when input was a :class:`DataFrame` and only a :class:`Series` was evaluated (:issue:`36951`)
2424
- Fixed regression in :class:`PeriodDtype` comparing both equal and unequal to its string representation (:issue:`37265`)
25+
- Fixed regression in certain offsets (:meth:`pd.offsets.Day() <pandas.tseries.offsets.Day>` and below) no longer being hashable (:issue:`37267`)
2526

2627
.. ---------------------------------------------------------------------------
2728

doc/source/whatsnew/v1.2.0.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ Other enhancements
221221
- :meth:`Rolling.var()` and :meth:`Rolling.std()` use Kahan summation and Welfords Method to avoid numerical issues (:issue:`37051`)
222222
- :meth:`DataFrame.plot` now recognizes ``xlabel`` and ``ylabel`` arguments for plots of type ``scatter`` and ``hexbin`` (:issue:`37001`)
223223
- :class:`DataFrame` now supports ``divmod`` operation (:issue:`37165`)
224+
- :meth:`DataFrame.to_parquet` now returns a ``bytes`` object when no ``path`` argument is passed (:issue:`37105`)
225+
- :class:`Rolling` now supports the ``closed`` argument for fixed windows (:issue:`34315`)
224226

225227
.. _whatsnew_120.api_breaking.python:
226228

@@ -410,7 +412,7 @@ Conversion
410412
Strings
411413
^^^^^^^
412414
- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`)
413-
-
415+
- Bug in :func:`to_numeric` raising a ``TypeError`` when attempting to convert a string dtype :class:`Series` containing only numeric strings and ``NA`` (:issue:`37262`)
414416
-
415417

416418

@@ -517,7 +519,7 @@ ExtensionArray
517519
- Fixed Bug where :class:`DataFrame` column set to scalar extension type via a dict instantion was considered an object type rather than the extension type (:issue:`35965`)
518520
- Fixed bug where ``astype()`` with equal dtype and ``copy=False`` would return a new object (:issue:`284881`)
519521
- Fixed bug when applying a NumPy ufunc with multiple outputs to a :class:`pandas.arrays.IntegerArray` returning None (:issue:`36913`)
520-
522+
- Fixed an inconsistency in :class:`PeriodArray`'s ``__init__`` signature to those of :class:`DatetimeArray` and :class:`TimedeltaArray` (:issue:`37289`)
521523

522524
Other
523525
^^^^^

pandas/_libs/lib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2019,7 +2019,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
20192019
elif util.is_bool_object(val):
20202020
floats[i] = uints[i] = ints[i] = bools[i] = val
20212021
seen.bool_ = True
2022-
elif val is None:
2022+
elif val is None or val is C_NA:
20232023
seen.saw_null()
20242024
floats[i] = complexes[i] = NaN
20252025
elif hasattr(val, '__len__') and len(val) == 0:

pandas/_libs/tslibs/offsets.pyx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,11 @@ cdef class Tick(SingleConstructorOffset):
791791
def is_anchored(self) -> bool:
792792
return False
793793

794+
# This is identical to BaseOffset.__hash__, but has to be redefined here
795+
# for Python 3, because we've redefined __eq__.
796+
def __hash__(self) -> int:
797+
return hash(self._params)
798+
794799
# --------------------------------------------------------------------
795800
# Comparison and Arithmetic Methods
796801

pandas/_libs/window/indexers.pyx

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,14 @@ def calculate_variable_window_bounds(
4343
(ndarray[int64], ndarray[int64])
4444
"""
4545
cdef:
46-
bint left_closed = False
47-
bint right_closed = False
48-
int index_growth_sign = 1
46+
bint left_closed = False, right_closed = False
4947
ndarray[int64_t, ndim=1] start, end
50-
int64_t start_bound, end_bound
48+
int64_t start_bound, end_bound, index_growth_sign = 1
5149
Py_ssize_t i, j
5250

53-
# if windows is variable, default is 'right', otherwise default is 'both'
51+
# default is 'right'
5452
if closed is None:
55-
closed = 'right' if index is not None else 'both'
53+
closed = 'right'
5654

5755
if closed in ['right', 'both']:
5856
right_closed = True

pandas/_typing.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
from pandas.core.indexes.base import Index
3939
from pandas.core.series import Series
4040

41+
from pandas.io.formats.format import EngFormatter
42+
4143
# array-like
4244

4345
AnyArrayLike = TypeVar("AnyArrayLike", "ExtensionArray", "Index", "Series", np.ndarray)
@@ -127,6 +129,10 @@
127129
EncodingVar = TypeVar("EncodingVar", str, None, Optional[str])
128130

129131

132+
# type of float formatter in DataFrameFormatter
133+
FloatFormatType = Union[str, Callable, "EngFormatter"]
134+
135+
130136
@dataclass
131137
class IOargs(Generic[ModeVar, EncodingVar]):
132138
"""

pandas/conftest.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import pandas.util._test_decorators as td
3535

3636
import pandas as pd
37-
from pandas import DataFrame
37+
from pandas import DataFrame, Series
3838
import pandas._testing as tm
3939
from pandas.core import ops
4040
from pandas.core.indexes.api import Index, MultiIndex
@@ -529,6 +529,23 @@ def series_with_simple_index(index):
529529
return _create_series(index)
530530

531531

532+
@pytest.fixture
533+
def series_with_multilevel_index():
534+
"""
535+
Fixture with a Series with a 2-level MultiIndex.
536+
"""
537+
arrays = [
538+
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
539+
["one", "two", "one", "two", "one", "two", "one", "two"],
540+
]
541+
tuples = zip(*arrays)
542+
index = MultiIndex.from_tuples(tuples)
543+
data = np.random.randn(8)
544+
ser = Series(data, index=index)
545+
ser[3] = np.NaN
546+
return ser
547+
548+
532549
_narrow_dtypes = [
533550
np.float16,
534551
np.float32,

0 commit comments

Comments
 (0)