|
1 | 1 | from datetime import datetime
|
| 2 | +from itertools import product |
2 | 3 |
|
3 | 4 | import numpy as np
|
4 | 5 | import pytest
|
5 | 6 |
|
| 7 | +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype |
| 8 | + |
6 | 9 | import pandas as pd
|
7 | 10 | from pandas import (
|
8 | 11 | DataFrame,
|
@@ -301,6 +304,194 @@ def test_reset_index_range(self):
|
301 | 304 | )
|
302 | 305 | tm.assert_frame_equal(result, expected)
|
303 | 306 |
|
| 307 | + def test_reset_index_multiindex_columns(self): |
| 308 | + levels = [["A", ""], ["B", "b"]] |
| 309 | + df = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) |
| 310 | + result = df[["B"]].rename_axis("A").reset_index() |
| 311 | + tm.assert_frame_equal(result, df) |
| 312 | + |
| 313 | + # GH#16120: already existing column |
| 314 | + msg = r"cannot insert \('A', ''\), already exists" |
| 315 | + with pytest.raises(ValueError, match=msg): |
| 316 | + df.rename_axis("A").reset_index() |
| 317 | + |
| 318 | + # GH#16164: multiindex (tuple) full key |
| 319 | + result = df.set_index([("A", "")]).reset_index() |
| 320 | + tm.assert_frame_equal(result, df) |
| 321 | + |
| 322 | + # with additional (unnamed) index level |
| 323 | + idx_col = DataFrame( |
| 324 | + [[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")]) |
| 325 | + ) |
| 326 | + expected = pd.concat([idx_col, df[[("B", "b"), ("A", "")]]], axis=1) |
| 327 | + result = df.set_index([("B", "b")], append=True).reset_index() |
| 328 | + tm.assert_frame_equal(result, expected) |
| 329 | + |
| 330 | + # with index name which is a too long tuple... |
| 331 | + msg = "Item must have length equal to number of levels." |
| 332 | + with pytest.raises(ValueError, match=msg): |
| 333 | + df.rename_axis([("C", "c", "i")]).reset_index() |
| 334 | + |
| 335 | + # or too short... |
| 336 | + levels = [["A", "a", ""], ["B", "b", "i"]] |
| 337 | + df2 = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) |
| 338 | + idx_col = DataFrame( |
| 339 | + [[0], [1]], columns=MultiIndex.from_tuples([("C", "c", "ii")]) |
| 340 | + ) |
| 341 | + expected = pd.concat([idx_col, df2], axis=1) |
| 342 | + result = df2.rename_axis([("C", "c")]).reset_index(col_fill="ii") |
| 343 | + tm.assert_frame_equal(result, expected) |
| 344 | + |
| 345 | + # ... which is incompatible with col_fill=None |
| 346 | + with pytest.raises( |
| 347 | + ValueError, |
| 348 | + match=( |
| 349 | + "col_fill=None is incompatible with " |
| 350 | + r"incomplete column name \('C', 'c'\)" |
| 351 | + ), |
| 352 | + ): |
| 353 | + df2.rename_axis([("C", "c")]).reset_index(col_fill=None) |
| 354 | + |
| 355 | + # with col_level != 0 |
| 356 | + result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C") |
| 357 | + tm.assert_frame_equal(result, expected) |
| 358 | + |
| 359 | + def test_reset_index_datetime(self, tz_naive_fixture): |
| 360 | + # GH#3950 |
| 361 | + tz = tz_naive_fixture |
| 362 | + idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") |
| 363 | + idx2 = Index(range(5), name="idx2", dtype="int64") |
| 364 | + idx = MultiIndex.from_arrays([idx1, idx2]) |
| 365 | + df = DataFrame( |
| 366 | + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, |
| 367 | + index=idx, |
| 368 | + ) |
| 369 | + |
| 370 | + expected = DataFrame( |
| 371 | + { |
| 372 | + "idx1": [ |
| 373 | + datetime(2011, 1, 1), |
| 374 | + datetime(2011, 1, 2), |
| 375 | + datetime(2011, 1, 3), |
| 376 | + datetime(2011, 1, 4), |
| 377 | + datetime(2011, 1, 5), |
| 378 | + ], |
| 379 | + "idx2": np.arange(5, dtype="int64"), |
| 380 | + "a": np.arange(5, dtype="int64"), |
| 381 | + "b": ["A", "B", "C", "D", "E"], |
| 382 | + }, |
| 383 | + columns=["idx1", "idx2", "a", "b"], |
| 384 | + ) |
| 385 | + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) |
| 386 | + |
| 387 | + tm.assert_frame_equal(df.reset_index(), expected) |
| 388 | + |
| 389 | + idx3 = pd.date_range( |
| 390 | + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" |
| 391 | + ) |
| 392 | + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) |
| 393 | + df = DataFrame( |
| 394 | + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, |
| 395 | + index=idx, |
| 396 | + ) |
| 397 | + |
| 398 | + expected = DataFrame( |
| 399 | + { |
| 400 | + "idx1": [ |
| 401 | + datetime(2011, 1, 1), |
| 402 | + datetime(2011, 1, 2), |
| 403 | + datetime(2011, 1, 3), |
| 404 | + datetime(2011, 1, 4), |
| 405 | + datetime(2011, 1, 5), |
| 406 | + ], |
| 407 | + "idx2": np.arange(5, dtype="int64"), |
| 408 | + "idx3": [ |
| 409 | + datetime(2012, 1, 1), |
| 410 | + datetime(2012, 2, 1), |
| 411 | + datetime(2012, 3, 1), |
| 412 | + datetime(2012, 4, 1), |
| 413 | + datetime(2012, 5, 1), |
| 414 | + ], |
| 415 | + "a": np.arange(5, dtype="int64"), |
| 416 | + "b": ["A", "B", "C", "D", "E"], |
| 417 | + }, |
| 418 | + columns=["idx1", "idx2", "idx3", "a", "b"], |
| 419 | + ) |
| 420 | + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) |
| 421 | + expected["idx3"] = expected["idx3"].apply( |
| 422 | + lambda d: Timestamp(d, tz="Europe/Paris") |
| 423 | + ) |
| 424 | + tm.assert_frame_equal(df.reset_index(), expected) |
| 425 | + |
| 426 | + # GH#7793 |
| 427 | + idx = MultiIndex.from_product( |
| 428 | + [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] |
| 429 | + ) |
| 430 | + df = DataFrame( |
| 431 | + np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx |
| 432 | + ) |
| 433 | + |
| 434 | + expected = DataFrame( |
| 435 | + { |
| 436 | + "level_0": "a a a b b b".split(), |
| 437 | + "level_1": [ |
| 438 | + datetime(2013, 1, 1), |
| 439 | + datetime(2013, 1, 2), |
| 440 | + datetime(2013, 1, 3), |
| 441 | + ] |
| 442 | + * 2, |
| 443 | + "a": np.arange(6, dtype="int64"), |
| 444 | + }, |
| 445 | + columns=["level_0", "level_1", "a"], |
| 446 | + ) |
| 447 | + expected["level_1"] = expected["level_1"].apply( |
| 448 | + lambda d: Timestamp(d, freq="D", tz=tz) |
| 449 | + ) |
| 450 | + result = df.reset_index() |
| 451 | + tm.assert_frame_equal(result, expected) |
| 452 | + |
| 453 | + def test_reset_index_period(self): |
| 454 | + # GH#7746 |
| 455 | + idx = MultiIndex.from_product( |
| 456 | + [pd.period_range("20130101", periods=3, freq="M"), list("abc")], |
| 457 | + names=["month", "feature"], |
| 458 | + ) |
| 459 | + |
| 460 | + df = DataFrame( |
| 461 | + np.arange(9, dtype="int64").reshape(-1, 1), index=idx, columns=["a"] |
| 462 | + ) |
| 463 | + expected = DataFrame( |
| 464 | + { |
| 465 | + "month": ( |
| 466 | + [pd.Period("2013-01", freq="M")] * 3 |
| 467 | + + [pd.Period("2013-02", freq="M")] * 3 |
| 468 | + + [pd.Period("2013-03", freq="M")] * 3 |
| 469 | + ), |
| 470 | + "feature": ["a", "b", "c"] * 3, |
| 471 | + "a": np.arange(9, dtype="int64"), |
| 472 | + }, |
| 473 | + columns=["month", "feature", "a"], |
| 474 | + ) |
| 475 | + result = df.reset_index() |
| 476 | + tm.assert_frame_equal(result, expected) |
| 477 | + |
| 478 | + def test_reset_index_delevel_infer_dtype(self): |
| 479 | + tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1])) |
| 480 | + index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) |
| 481 | + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) |
| 482 | + deleveled = df.reset_index() |
| 483 | + assert is_integer_dtype(deleveled["prm1"]) |
| 484 | + assert is_float_dtype(deleveled["prm2"]) |
| 485 | + |
| 486 | + def test_reset_index_with_drop( |
| 487 | + self, multiindex_year_month_day_dataframe_random_data |
| 488 | + ): |
| 489 | + ymd = multiindex_year_month_day_dataframe_random_data |
| 490 | + |
| 491 | + deleveled = ymd.reset_index(drop=True) |
| 492 | + assert len(deleveled.columns) == len(ymd.columns) |
| 493 | + assert deleveled.index.name == ymd.index.name |
| 494 | + |
304 | 495 |
|
305 | 496 | @pytest.mark.parametrize(
|
306 | 497 | "array, dtype",
|
|
0 commit comments