Skip to content

Commit a9de220

Browse files
authored
Merge branch 'main' into fix-merge-attrs
2 parents 2db541a + d81882b commit a9de220

File tree

11 files changed

+174
-73
lines changed

11 files changed

+174
-73
lines changed

doc/source/development/maintaining.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ Post-Release
488488
for reference):
489489

490490
- The pandas-dev and pydata mailing lists
491-
- Twitter, Mastodon, Telegram and LinkedIn
491+
- X, Mastodon, Telegram and LinkedIn
492492

493493
7. Update this release instructions to fix anything incorrect and to update about any
494494
change since the last release.

pandas/io/formats/style.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1644,7 +1644,7 @@ def _update_ctx_header(self, attrs: DataFrame, axis: AxisInt) -> None:
16441644
for j in attrs.columns:
16451645
ser = attrs[j]
16461646
for i, c in ser.items():
1647-
if not c:
1647+
if not c or pd.isna(c):
16481648
continue
16491649
css_list = maybe_convert_css_to_tuples(c)
16501650
if axis == 0:

pandas/io/pytables.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5297,6 +5297,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
52975297
kind = "integer"
52985298
elif dtype_str == "object":
52995299
kind = "object"
5300+
elif dtype_str == "str":
5301+
kind = "str"
53005302
else:
53015303
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
53025304

pandas/tests/groupby/test_apply.py

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -255,19 +255,19 @@ def test_apply_with_mixed_dtype():
255255
"foo2": ["one", "two", "two", "three", "one", "two"],
256256
}
257257
)
258-
result = df.apply(lambda x: x, axis=1).dtypes
259-
expected = df.dtypes
260-
tm.assert_series_equal(result, expected)
258+
result = df.apply(lambda x: x, axis=1)
259+
expected = df
260+
tm.assert_frame_equal(result, expected)
261261

262262
# GH 3610 incorrect dtype conversion with as_index=False
263263
df = DataFrame({"c1": [1, 2, 6, 6, 8]})
264264
df["c2"] = df.c1 / 2.0
265-
result1 = df.groupby("c2").mean().reset_index().c2
266-
result2 = df.groupby("c2", as_index=False).mean().c2
267-
tm.assert_series_equal(result1, result2)
265+
result1 = df.groupby("c2").mean().reset_index()
266+
result2 = df.groupby("c2", as_index=False).mean()
267+
tm.assert_frame_equal(result1, result2)
268268

269269

270-
def test_groupby_as_index_apply():
270+
def test_groupby_as_index_apply(as_index):
271271
# GH #4648 and #3417
272272
df = DataFrame(
273273
{
@@ -276,27 +276,35 @@ def test_groupby_as_index_apply():
276276
"time": range(6),
277277
}
278278
)
279+
gb = df.groupby("user_id", as_index=as_index)
279280

280-
g_as = df.groupby("user_id", as_index=True)
281-
g_not_as = df.groupby("user_id", as_index=False)
282-
283-
res_as = g_as.head(2).index
284-
res_not_as = g_not_as.head(2).index
285-
exp = Index([0, 1, 2, 4])
286-
tm.assert_index_equal(res_as, exp)
287-
tm.assert_index_equal(res_not_as, exp)
288-
289-
res_as_apply = g_as.apply(lambda x: x.head(2)).index
290-
res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
281+
expected = DataFrame(
282+
{
283+
"item_id": ["b", "b", "a", "a"],
284+
"user_id": [1, 2, 1, 3],
285+
"time": [0, 1, 2, 4],
286+
},
287+
index=[0, 1, 2, 4],
288+
)
289+
result = gb.head(2)
290+
tm.assert_frame_equal(result, expected)
291291

292292
# apply doesn't maintain the original ordering
293293
# changed in GH5610 as the as_index=False returns a MI here
294-
exp_not_as_apply = Index([0, 2, 1, 4])
295-
tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
296-
exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None])
297-
298-
tm.assert_index_equal(res_as_apply, exp_as_apply)
299-
tm.assert_index_equal(res_not_as_apply, exp_not_as_apply)
294+
if as_index:
295+
tp = [(1, 0), (1, 2), (2, 1), (3, 4)]
296+
index = MultiIndex.from_tuples(tp, names=["user_id", None])
297+
else:
298+
index = Index([0, 2, 1, 4])
299+
expected = DataFrame(
300+
{
301+
"item_id": list("baba"),
302+
"time": [0, 2, 1, 4],
303+
},
304+
index=index,
305+
)
306+
result = gb.apply(lambda x: x.head(2))
307+
tm.assert_frame_equal(result, expected)
300308

301309

302310
def test_groupby_as_index_apply_str():

pandas/tests/groupby/test_apply_mutate.py

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,20 @@ def test_mutate_groups():
3838
}
3939
)
4040

41-
def f_copy(x):
41+
def f(x):
4242
x = x.copy()
4343
x["rank"] = x.val.rank(method="min")
4444
return x.groupby("cat2")["rank"].min()
4545

46-
def f_no_copy(x):
47-
x["rank"] = x.val.rank(method="min")
48-
return x.groupby("cat2")["rank"].min()
49-
50-
grpby_copy = df.groupby("cat1").apply(f_copy)
51-
grpby_no_copy = df.groupby("cat1").apply(f_no_copy)
52-
tm.assert_series_equal(grpby_copy, grpby_no_copy)
46+
expected = pd.DataFrame(
47+
{
48+
"cat1": list("aaaabbb"),
49+
"cat2": list("cdefcde"),
50+
"rank": [3.0, 2.0, 5.0, 1.0, 2.0, 4.0, 1.0],
51+
}
52+
).set_index(["cat1", "cat2"])["rank"]
53+
result = df.groupby("cat1").apply(f)
54+
tm.assert_series_equal(result, expected)
5355

5456

5557
def test_no_mutate_but_looks_like():
@@ -61,22 +63,3 @@ def test_no_mutate_but_looks_like():
6163
result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].value)
6264
result2 = df.groupby("key", group_keys=True).apply(lambda x: x.value)
6365
tm.assert_series_equal(result1, result2)
64-
65-
66-
def test_apply_function_with_indexing():
67-
# GH: 33058
68-
df = pd.DataFrame(
69-
{"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]}
70-
)
71-
72-
def fn(x):
73-
x.loc[x.index[-1], "col2"] = 0
74-
return x.col2
75-
76-
result = df.groupby(["col1"], as_index=False).apply(fn)
77-
expected = pd.Series(
78-
[1, 2, 0, 4, 5, 0],
79-
index=range(6),
80-
name="col2",
81-
)
82-
tm.assert_series_equal(result, expected)

pandas/tests/indexing/test_loc.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3297,3 +3297,23 @@ def test_loc_reindexing_of_empty_index(self):
32973297
df.loc[Series([False] * 4, index=df.index, name=0), 0] = df[0]
32983298
expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"])
32993299
tm.assert_frame_equal(df, expected)
3300+
3301+
def test_loc_setitem_matching_index(self):
3302+
# GH 25548
3303+
s = Series(0.0, index=list("abcd"))
3304+
s1 = Series(1.0, index=list("ab"))
3305+
s2 = Series(2.0, index=list("xy"))
3306+
3307+
# Test matching indices
3308+
s.loc[["a", "b"]] = s1
3309+
3310+
result = s[["a", "b"]]
3311+
expected = s1
3312+
tm.assert_series_equal(result, expected)
3313+
3314+
# Test unmatched indices
3315+
s.loc[["a", "b"]] = s2
3316+
3317+
result = s[["a", "b"]]
3318+
expected = Series([np.nan, np.nan], index=["a", "b"])
3319+
tm.assert_series_equal(result, expected)

pandas/tests/io/formats/style/test_to_latex.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas import (
97
DataFrame,
108
MultiIndex,
@@ -731,7 +729,6 @@ def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp):
731729
)
732730

733731

734-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
735732
@pytest.mark.parametrize("index", [True, False])
736733
@pytest.mark.parametrize(
737734
"columns, siunitx",

pandas/tests/io/pytables/test_file_handling.py

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,11 @@
3737

3838
pytestmark = [
3939
pytest.mark.single_cpu,
40-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
4140
]
4241

4342

4443
@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
45-
def test_mode(setup_path, tmp_path, mode):
44+
def test_mode(setup_path, tmp_path, mode, using_infer_string):
4645
df = DataFrame(
4746
np.random.default_rng(2).standard_normal((10, 4)),
4847
columns=Index(list("ABCD"), dtype=object),
@@ -91,10 +90,12 @@ def test_mode(setup_path, tmp_path, mode):
9190
read_hdf(path, "df", mode=mode)
9291
else:
9392
result = read_hdf(path, "df", mode=mode)
93+
if using_infer_string:
94+
df.columns = df.columns.astype("str")
9495
tm.assert_frame_equal(result, df)
9596

9697

97-
def test_default_mode(tmp_path, setup_path):
98+
def test_default_mode(tmp_path, setup_path, using_infer_string):
9899
# read_hdf uses default mode
99100
df = DataFrame(
100101
np.random.default_rng(2).standard_normal((10, 4)),
@@ -104,7 +105,10 @@ def test_default_mode(tmp_path, setup_path):
104105
path = tmp_path / setup_path
105106
df.to_hdf(path, key="df", mode="w")
106107
result = read_hdf(path, "df")
107-
tm.assert_frame_equal(result, df)
108+
expected = df.copy()
109+
if using_infer_string:
110+
expected.columns = expected.columns.astype("str")
111+
tm.assert_frame_equal(result, expected)
108112

109113

110114
def test_reopen_handle(tmp_path, setup_path):
@@ -163,7 +167,7 @@ def test_reopen_handle(tmp_path, setup_path):
163167
assert not store.is_open
164168

165169

166-
def test_open_args(setup_path):
170+
def test_open_args(setup_path, using_infer_string):
167171
with tm.ensure_clean(setup_path) as path:
168172
df = DataFrame(
169173
1.1 * np.arange(120).reshape((30, 4)),
@@ -178,8 +182,13 @@ def test_open_args(setup_path):
178182
store["df"] = df
179183
store.append("df2", df)
180184

181-
tm.assert_frame_equal(store["df"], df)
182-
tm.assert_frame_equal(store["df2"], df)
185+
expected = df.copy()
186+
if using_infer_string:
187+
expected.index = expected.index.astype("str")
188+
expected.columns = expected.columns.astype("str")
189+
190+
tm.assert_frame_equal(store["df"], expected)
191+
tm.assert_frame_equal(store["df2"], expected)
183192

184193
store.close()
185194

@@ -194,7 +203,7 @@ def test_flush(setup_path):
194203
store.flush(fsync=True)
195204

196205

197-
def test_complibs_default_settings(tmp_path, setup_path):
206+
def test_complibs_default_settings(tmp_path, setup_path, using_infer_string):
198207
# GH15943
199208
df = DataFrame(
200209
1.1 * np.arange(120).reshape((30, 4)),
@@ -207,7 +216,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
207216
tmpfile = tmp_path / setup_path
208217
df.to_hdf(tmpfile, key="df", complevel=9)
209218
result = read_hdf(tmpfile, "df")
210-
tm.assert_frame_equal(result, df)
219+
expected = df.copy()
220+
if using_infer_string:
221+
expected.index = expected.index.astype("str")
222+
expected.columns = expected.columns.astype("str")
223+
tm.assert_frame_equal(result, expected)
211224

212225
with tables.open_file(tmpfile, mode="r") as h5file:
213226
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -218,7 +231,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
218231
tmpfile = tmp_path / setup_path
219232
df.to_hdf(tmpfile, key="df", complib="zlib")
220233
result = read_hdf(tmpfile, "df")
221-
tm.assert_frame_equal(result, df)
234+
expected = df.copy()
235+
if using_infer_string:
236+
expected.index = expected.index.astype("str")
237+
expected.columns = expected.columns.astype("str")
238+
tm.assert_frame_equal(result, expected)
222239

223240
with tables.open_file(tmpfile, mode="r") as h5file:
224241
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -229,7 +246,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
229246
tmpfile = tmp_path / setup_path
230247
df.to_hdf(tmpfile, key="df")
231248
result = read_hdf(tmpfile, "df")
232-
tm.assert_frame_equal(result, df)
249+
expected = df.copy()
250+
if using_infer_string:
251+
expected.index = expected.index.astype("str")
252+
expected.columns = expected.columns.astype("str")
253+
tm.assert_frame_equal(result, expected)
233254

234255
with tables.open_file(tmpfile, mode="r") as h5file:
235256
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
@@ -308,6 +329,7 @@ def test_complibs(tmp_path, lvl, lib, request):
308329
assert node.filters.complib == lib
309330

310331

332+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
311333
@pytest.mark.skipif(
312334
not is_platform_little_endian(), reason="reason platform is not little endian"
313335
)
@@ -325,6 +347,7 @@ def test_encoding(setup_path):
325347
tm.assert_frame_equal(result, expected)
326348

327349

350+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
328351
@pytest.mark.parametrize(
329352
"val",
330353
[

pandas/tests/io/pytables/test_subclass.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
DataFrame,
86
Series,
@@ -19,7 +17,6 @@
1917

2018
class TestHDFStoreSubclass:
2119
# GH 33748
22-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
2320
def test_supported_for_subclass_dataframe(self, tmp_path):
2421
data = {"a": [1, 2], "b": [3, 4]}
2522
sdf = tm.SubclassedDataFrame(data, dtype=np.intp)

pandas/tests/io/test_common.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
import numpy as np
2020
import pytest
2121

22-
from pandas._config import using_string_dtype
23-
2422
from pandas.compat import (
2523
WASM,
2624
is_platform_windows,
@@ -365,7 +363,6 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
365363
expected = f_path.read()
366364
assert result == expected
367365

368-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
369366
def test_write_fspath_hdf5(self):
370367
# Same test as write_fspath_all, except HDF5 files aren't
371368
# necessarily byte-for-byte identical for a given dataframe, so we'll

0 commit comments

Comments
 (0)