Skip to content

Commit c21d71b

Browse files
Merge branch 'master' into fix-read-sql-empty-result-with-chunksize-bug-34411
2 parents bda0257 + b9a9769 commit c21d71b

File tree

181 files changed

+1840
-1505
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

181 files changed

+1840
-1505
lines changed

Makefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,10 @@ doc:
2525
cd doc; \
2626
python make.py clean; \
2727
python make.py html
28+
29+
check:
30+
python3 scripts/validate_unwanted_patterns.py \
31+
--validation-type="private_function_across_module" \
32+
--included-file-extensions="py" \
33+
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
34+
pandas/

ci/code_checks.sh

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
116116
fi
117117
RET=$(($RET + $?)) ; echo $MSG "DONE"
118118

119+
MSG='Check for use of private module attribute access' ; echo $MSG
120+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
121+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
122+
else
123+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
124+
fi
125+
RET=$(($RET + $?)) ; echo $MSG "DONE"
126+
119127
echo "isort --version-number"
120128
isort --version-number
121129

@@ -179,6 +187,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
179187
invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas
180188
RET=$(($RET + $?)) ; echo $MSG "DONE"
181189

190+
MSG='Check for use of builtin filter function' ; echo $MSG
191+
invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
192+
RET=$(($RET + $?)) ; echo $MSG "DONE"
193+
182194
# Check for the following code in testing: `np.testing` and `np.array_equal`
183195
MSG='Check for invalid testing' ; echo $MSG
184196
invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
@@ -226,15 +238,22 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
226238
invgrep -R --include=*.{py,pyx} '!r}' pandas
227239
RET=$(($RET + $?)) ; echo $MSG "DONE"
228240

241+
# -------------------------------------------------------------------------
242+
# Type annotations
243+
229244
MSG='Check for use of comment-based annotation syntax' ; echo $MSG
230245
invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
231246
RET=$(($RET + $?)) ; echo $MSG "DONE"
232247

233-
# https://github.com/python/mypy/issues/7384
234-
# MSG='Check for missing error codes with # type: ignore' ; echo $MSG
235-
# invgrep -R --include="*.py" -P '# type: ignore(?!\[)' pandas
236-
# RET=$(($RET + $?)) ; echo $MSG "DONE"
248+
MSG='Check for missing error codes with # type: ignore' ; echo $MSG
249+
invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas
250+
RET=$(($RET + $?)) ; echo $MSG "DONE"
251+
252+
MSG='Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias' ; echo $MSG
253+
invgrep -R --include="*.py" --exclude=_typing.py -E 'Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]' pandas
254+
RET=$(($RET + $?)) ; echo $MSG "DONE"
237255

256+
# -------------------------------------------------------------------------
238257
MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
239258
invgrep -R --include=*.{py,pyx} '\.__class__' pandas
240259
RET=$(($RET + $?)) ; echo $MSG "DONE"

doc/source/development/contributing_docstring.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,18 @@ The next example gives an idea of what a docstring looks like:
3232
Parameters
3333
----------
3434
num1 : int
35-
First number to add
35+
First number to add.
3636
num2 : int
37-
Second number to add
37+
Second number to add.
3838
3939
Returns
4040
-------
4141
int
42-
The sum of `num1` and `num2`
42+
The sum of `num1` and `num2`.
4343
4444
See Also
4545
--------
46-
subtract : Subtract one integer from another
46+
subtract : Subtract one integer from another.
4747
4848
Examples
4949
--------
@@ -998,4 +998,4 @@ mapping function names to docstrings. Wherever possible, we prefer using
998998

999999
See ``pandas.core.generic.NDFrame.fillna`` for an example template, and
10001000
``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna``
1001-
for the filled versions.
1001+
for the filled versions.

doc/source/user_guide/missing_data.rst

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -689,32 +689,6 @@ You can also operate on the DataFrame in place:
689689
690690
df.replace(1.5, np.nan, inplace=True)
691691
692-
.. warning::
693-
694-
When replacing multiple ``bool`` or ``datetime64`` objects, the first
695-
argument to ``replace`` (``to_replace``) must match the type of the value
696-
being replaced. For example,
697-
698-
.. code-block:: python
699-
700-
>>> s = pd.Series([True, False, True])
701-
>>> s.replace({'a string': 'new value', True: False}) # raises
702-
TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
703-
704-
will raise a ``TypeError`` because one of the ``dict`` keys is not of the
705-
correct type for replacement.
706-
707-
However, when replacing a *single* object such as,
708-
709-
.. ipython:: python
710-
711-
s = pd.Series([True, False, True])
712-
s.replace('a string', 'another string')
713-
714-
the original ``NDFrame`` object will be returned untouched. We're working on
715-
unifying this API, but for backwards compatibility reasons we cannot break
716-
the latter behavior. See :issue:`6354` for more details.
717-
718692
Missing data casting rules and indexing
719693
---------------------------------------
720694

doc/source/whatsnew/v1.1.2.rst

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,15 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`)
1818
- Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`)
19+
- Fix regression in :meth:`DataFrame.append` mixing tz-aware and tz-naive datetime columns (:issue:`35460`)
1920
- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
21+
- Regression where :meth:`MultiIndex.get_loc` would return a slice spanning the full index when passed an empty list (:issue:`35878`)
22+
- Fix regression in invalid cache after an indexing operation; this can manifest when setting which does not update the data (:issue:`35521`)
2023
- Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`)
24+
- Fix regression in pickle roundtrip of the ``closed`` attribute of :class:`IntervalIndex` (:issue:`35658`)
25+
- Fixed regression in :meth:`DataFrameGroupBy.agg` where a ``ValueError: buffer source array is read-only`` would be raised when the underlying array is read-only (:issue:`36014`)
2126
-
2227

23-
2428
.. ---------------------------------------------------------------------------
2529
2630
.. _whatsnew_112.bug_fixes:
@@ -30,17 +34,22 @@ Bug fixes
3034
- Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`)
3135
- Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`)
3236
- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
33-
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
37+
- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` not respecting the ``errors`` argument when set to ``"ignore"`` for extension dtypes (:issue:`35471`)
38+
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should be ``""`` (:issue:`35712`)
3439
- Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`)
35-
- Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`36051`)
40+
- Bug in :class:`Series` constructor incorrectly raising a ``TypeError`` when passed an ordered set (:issue:`36044`)
41+
- Bug in :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returned incorrect year for certain dates (:issue:`36032`)
42+
- Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`33675`)
43+
- Bug in :meth:`DataFrame.corr` causing subsequent indexing lookups to be incorrect (:issue:`35882`)
44+
- Bug in :meth:`import_optional_dependency` returning incorrect package names in cases where package name is different from import name (:issue:`35948`)
3645

3746
.. ---------------------------------------------------------------------------
3847
3948
.. _whatsnew_112.other:
4049

4150
Other
4251
~~~~~
43-
- :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize`(:issue:`35667`)
52+
- :meth:`factorize` now supports ``na_sentinel=None`` to include NaN in the uniques of the values and remove ``dropna`` keyword which was unintentionally exposed to public facing API in 1.1 version from :meth:`factorize` (:issue:`35667`)
4453

4554
.. ---------------------------------------------------------------------------
4655

doc/source/whatsnew/v1.2.0.rst

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ Other enhancements
103103

104104
- Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`)
105105
- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
106-
-
106+
- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
107107
-
108108

109109
.. _whatsnew_120.api_breaking.python:
@@ -136,6 +136,8 @@ If installed, we now require:
136136
+-----------------+-----------------+----------+---------+
137137
| pytest (dev) | 5.0.1 | | X |
138138
+-----------------+-----------------+----------+---------+
139+
| mypy (dev) | 0.782 | | X |
140+
+-----------------+-----------------+----------+---------+
139141

140142
For `optional libraries <https://dev.pandas.io/docs/install.html#dependencies>`_ the general recommendation is to use the latest version.
141143
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
@@ -213,7 +215,6 @@ Performance improvements
213215
Bug fixes
214216
~~~~~~~~~
215217

216-
217218
Categorical
218219
^^^^^^^^^^^
219220

@@ -254,7 +255,7 @@ Conversion
254255

255256
Strings
256257
^^^^^^^
257-
258+
- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`)
258259
-
259260
-
260261

@@ -267,8 +268,9 @@ Interval
267268

268269
Indexing
269270
^^^^^^^^
271+
270272
- Bug in :meth:`PeriodIndex.get_loc` incorrectly raising ``ValueError`` on non-datelike strings instead of ``KeyError``, causing similar errors in :meth:`Series.__geitem__`, :meth:`Series.__contains__`, and :meth:`Series.loc.__getitem__` (:issue:`34240`)
271-
-
273+
- Bug in :meth:`Index.sort_values` where, when empty values were passed, the method would break by trying to compare missing values instead of pushing them to the end of the sort order. (:issue:`35584`)
272274
-
273275

274276
Missing
@@ -293,12 +295,13 @@ I/O
293295
- :meth:`to_csv` did not support zip compression for binary file object not having a filename (:issue: `35058`)
294296
- :meth:`to_csv` and :meth:`read_csv` did not honor `compression` and `encoding` for path-like objects that are internally converted to file-like objects (:issue:`35677`, :issue:`26124`, and :issue:`32392`)
295297
- :meth:`read_sql` returned an empty generator if `chunksize` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`)
298+
- :meth:`to_picke` and :meth:`read_pickle` did not support compression for file-objects (:issue:`26237`, :issue:`29054`, and :issue:`29570`)
296299

297300
Plotting
298301
^^^^^^^^
299302

300-
-
301-
-
303+
- Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes causes a ``ValueError`` (:issue:`21003`)
304+
- meth:`DataFrame.plot` and meth:`Series.plot` raise ``UserWarning`` about usage of FixedFormatter and FixedLocator (:issue:`35684` and :issue:`35945`)
302305

303306
Groupby/resample/rolling
304307
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -310,6 +313,8 @@ Groupby/resample/rolling
310313
- Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on. (:issue:`35792`)
311314
- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
312315
- Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
316+
- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
317+
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
313318
-
314319

315320
Reshaping
@@ -336,6 +341,7 @@ ExtensionArray
336341
Other
337342
^^^^^
338343
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
344+
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
339345
-
340346

341347
.. ---------------------------------------------------------------------------

pandas/_libs/algos.pyx

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ ctypedef fused algos_t:
412412
uint8_t
413413

414414

415-
def _validate_limit(nobs: int, limit=None) -> int:
415+
def validate_limit(nobs: int, limit=None) -> int:
416416
"""
417417
Check that the `limit` argument is a positive integer.
418418

@@ -452,7 +452,7 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
452452
indexer = np.empty(nright, dtype=np.int64)
453453
indexer[:] = -1
454454

455-
lim = _validate_limit(nright, limit)
455+
lim = validate_limit(nright, limit)
456456

457457
if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
458458
return indexer
@@ -509,7 +509,7 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
509509
if N == 0:
510510
return
511511

512-
lim = _validate_limit(N, limit)
512+
lim = validate_limit(N, limit)
513513

514514
val = values[0]
515515
for i in range(N):
@@ -537,7 +537,7 @@ def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
537537
if N == 0:
538538
return
539539

540-
lim = _validate_limit(N, limit)
540+
lim = validate_limit(N, limit)
541541

542542
for j in range(K):
543543
fill_count = 0
@@ -593,7 +593,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
593593
indexer = np.empty(nright, dtype=np.int64)
594594
indexer[:] = -1
595595

596-
lim = _validate_limit(nright, limit)
596+
lim = validate_limit(nright, limit)
597597

598598
if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
599599
return indexer
@@ -651,7 +651,7 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
651651
if N == 0:
652652
return
653653

654-
lim = _validate_limit(N, limit)
654+
lim = validate_limit(N, limit)
655655

656656
val = values[N - 1]
657657
for i in range(N - 1, -1, -1):
@@ -681,7 +681,7 @@ def backfill_2d_inplace(algos_t[:, :] values,
681681
if N == 0:
682682
return
683683

684-
lim = _validate_limit(N, limit)
684+
lim = validate_limit(N, limit)
685685

686686
for j in range(K):
687687
fill_count = 0

0 commit comments

Comments
 (0)