Skip to content

Commit b1e014e

Browse files
committed
Merge branch 'issue-index' of https://github.com/xaris96/pandas into issue-index
2 parents 3cfdbcd + c2a27b4 commit b1e014e

File tree

10 files changed

+58
-14
lines changed

10 files changed

+58
-14
lines changed

doc/source/whatsnew/v2.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ Timezones
120120
Numeric
121121
^^^^^^^
122122
- Enabled :class:`Series.mode` and :class:`DataFrame.mode` with ``dropna=False`` to sort the result for all dtypes in the presence of NA values; previously only certain dtypes would sort (:issue:`60702`)
123+
- Bug in :meth:`Series.round` on object columns no longer raises ``TypeError``
123124
-
124125

125126
Conversion

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,7 @@ I/O
779779
- Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
780780
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
781781
- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
782+
- Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
782783
- Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
783784
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
784785
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)

pandas/core/computation/pytables.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,8 @@ def stringify(value):
239239
if conv_val not in metadata:
240240
result = -1
241241
else:
242-
result = metadata.searchsorted(conv_val, side="left")
242+
# Find the index of the first match of conv_val in metadata
243+
result = np.flatnonzero(metadata == conv_val)[0]
243244
return TermValue(result, result, "integer")
244245
elif kind == "integer":
245246
try:

pandas/core/frame.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9258,11 +9258,11 @@ def groupby(
92589258
92599259
Parameters
92609260
----------%s
9261-
columns : str or object or a list of str
9261+
columns : Hashable or a sequence of the previous
92629262
Column to use to make new frame's columns.
9263-
index : str or object or a list of str, optional
9263+
index : Hashable or a sequence of the previous, optional
92649264
Column to use to make new frame's index. If not given, uses existing index.
9265-
values : str, object or a list of the previous, optional
9265+
values : Hashable or a sequence of the previous, optional
92669266
Column(s) to use for populating new frame's values. If not
92679267
specified, all remaining columns will be used and the result will
92689268
have hierarchically indexed columns.
@@ -9401,12 +9401,12 @@ def pivot(
94019401
----------%s
94029402
values : list-like or scalar, optional
94039403
Column or columns to aggregate.
9404-
index : column, Grouper, array, or list of the previous
9404+
index : column, Grouper, array, or sequence of the previous
94059405
Keys to group by on the pivot table index. If a list is passed,
94069406
it can contain any of the other types (except list). If an array is
94079407
passed, it must be the same length as the data and will be used in
94089408
the same manner as column values.
9409-
columns : column, Grouper, array, or list of the previous
9409+
columns : column, Grouper, array, or sequence of the previous
94109410
Keys to group by on the pivot table column. If a list is passed,
94119411
it can contain any of the other types (except list). If an array is
94129412
passed, it must be the same length as the data and will be used in

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,10 +1731,16 @@ def name(self) -> Hashable:
17311731
"""
17321732
Return Index or MultiIndex name.
17331733
1734+
Returns
1735+
-------
1736+
label (hashable object)
1737+
The name of the Index.
1738+
17341739
See Also
17351740
--------
17361741
Index.set_names: Able to set new names partially and by level.
17371742
Index.rename: Able to set new names partially and by level.
1743+
Series.name: Corresponding Series property.
17381744
17391745
Examples
17401746
--------

pandas/core/reshape/pivot.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,12 @@ def pivot_table(
7676
Input pandas DataFrame object.
7777
values : list-like or scalar, optional
7878
Column or columns to aggregate.
79-
index : column, Grouper, array, or list of the previous
79+
index : column, Grouper, array, or sequence of the previous
8080
Keys to group by on the pivot table index. If a list is passed,
8181
it can contain any of the other types (except list). If an array is
8282
passed, it must be the same length as the data and will be used in
8383
the same manner as column values.
84-
columns : column, Grouper, array, or list of the previous
84+
columns : column, Grouper, array, or sequence of the previous
8585
Keys to group by on the pivot table column. If a list is passed,
8686
it can contain any of the other types (except list). If an array is
8787
passed, it must be the same length as the data and will be used in
@@ -708,11 +708,11 @@ def pivot(
708708
----------
709709
data : DataFrame
710710
Input pandas DataFrame object.
711-
columns : str or object or a list of str
711+
columns : Hashable or a sequence of the previous
712712
Column to use to make new frame's columns.
713-
index : str or object or a list of str, optional
713+
index : Hashable or a sequence of the previous, optional
714714
Column to use to make new frame's index. If not given, uses existing index.
715-
values : str, object or a list of the previous, optional
715+
values : Hashable or a sequence of the previous, optional
716716
Column(s) to use for populating new frame's values. If not
717717
specified, all remaining columns will be used and the result will
718718
have hierarchically indexed columns.

pandas/core/series.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2514,6 +2514,8 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series:
25142514
dtype: float64
25152515
"""
25162516
nv.validate_round(args, kwargs)
2517+
if self.dtype == "object":
2518+
raise TypeError("Expected numeric dtype, got object instead.")
25172519
new_mgr = self._mgr.round(decimals=decimals)
25182520
return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(
25192521
self, method="round"

pandas/tests/indexes/test_common.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,9 @@ def test_sort_values_invalid_na_position(
464464
reason="Sorting fails due to heterogeneous types in index (int vs str)",
465465
strict=False,
466466
)
467-
def test_sort_values_with_missing(index_with_missing, na_position, request, box_in_series):
467+
def test_sort_values_with_missing(
468+
index_with_missing, na_position, request, box_in_series
469+
):
468470
# GH 35584. Test that sort_values works with missing values,
469471
# sort non-missing and place missing according to na_position
470472

@@ -489,7 +491,6 @@ def test_sort_values_with_missing(index_with_missing, na_position, request, box_
489491
else:
490492
not_na_vals = index_with_missing[index_with_missing.notna()].values
491493

492-
493494
sorted_values = np.sort(not_na_vals)
494495
if na_position == "first":
495496
sorted_values = np.concatenate([[None] * missing_count, sorted_values])
@@ -500,7 +501,9 @@ def test_sort_values_with_missing(index_with_missing, na_position, request, box_
500501
if isinstance(index_with_missing, pd.Series):
501502
expected = pd.Series(sorted_values, dtype=index_with_missing.dtype)
502503
else:
503-
expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
504+
expected = type(index_with_missing)(
505+
sorted_values, dtype=index_with_missing.dtype
506+
)
504507

505508
result = index_with_missing.sort_values(na_position=na_position)
506509
if isinstance(index_with_missing, pd.Series):

pandas/tests/io/pytables/test_store.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
timedelta_range,
2424
)
2525
import pandas._testing as tm
26+
from pandas.api.types import (
27+
CategoricalDtype,
28+
)
2629
from pandas.tests.io.pytables.common import (
2730
_maybe_remove,
2831
ensure_clean_store,
@@ -1107,3 +1110,23 @@ def test_store_bool_index(tmp_path, setup_path):
11071110
df.to_hdf(path, key="a")
11081111
result = read_hdf(path, "a")
11091112
tm.assert_frame_equal(expected, result)
1113+
1114+
1115+
@pytest.mark.parametrize("model", ["name", "longname", "verylongname"])
1116+
def test_select_categorical_string_columns(tmp_path, model):
1117+
# Corresponding to BUG: 57608
1118+
1119+
path = tmp_path / "test.h5"
1120+
1121+
models = CategoricalDtype(categories=["name", "longname", "verylongname"])
1122+
df = DataFrame(
1123+
{"modelId": ["name", "longname", "longname"], "value": [1, 2, 3]}
1124+
).astype({"modelId": models, "value": int})
1125+
1126+
with HDFStore(path, "w") as store:
1127+
store.append("df", df, data_columns=["modelId"])
1128+
1129+
with HDFStore(path, "r") as store:
1130+
result = store.select("df", "modelId == model")
1131+
expected = df[df["modelId"] == model]
1132+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_round.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,10 @@ def test_round_ea_boolean(self):
7272
tm.assert_series_equal(result, expected)
7373
result.iloc[0] = False
7474
tm.assert_series_equal(ser, expected)
75+
76+
def test_round_dtype_object(self):
77+
# GH#61206
78+
ser = Series([0.2], dtype="object")
79+
msg = "Expected numeric dtype, got object instead."
80+
with pytest.raises(TypeError, match=msg):
81+
ser.round()

0 commit comments

Comments
 (0)