From 909a3c10dd7b9ee33d9247d6c8f33af9087599d1 Mon Sep 17 00:00:00 2001 From: UV Date: Wed, 11 Dec 2024 19:01:21 +0530 Subject: [PATCH 1/5] Fix KeyError by adding check for _convert_dates --- pandas/io/stata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 34d95fb59a21c..474663a85216d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2611,7 +2611,7 @@ def _check_column_names(self, data: DataFrame) -> DataFrame: # Check date conversion, and fix key if needed if self._convert_dates: for c, o in zip(columns, original_columns): - if c != o: + if c != o and o in self._convert_dates: self._convert_dates[c] = self._convert_dates[o] del self._convert_dates[o] From 0ce2747776d36579c399538b422b907f39ea5022 Mon Sep 17 00:00:00 2001 From: UV Date: Fri, 3 Jan 2025 12:51:32 +0530 Subject: [PATCH 2/5] Added test for handling _convert_dates key --- pandas/tests/io/test_stata.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 9288b98d79fbe..f4e7e48cecb5c 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -2587,3 +2587,30 @@ def test_many_strl(temp_file, version): lbls = ["".join(v) for v in itertools.product(*([string.ascii_letters] * 3))] value_labels = {"col": {i: lbls[i] for i in range(n)}} df.to_stata(temp_file, value_labels=value_labels, version=version) + + +@pytest.mark.parametrize("version", [114, 117, 118, 119, None]) +def test_convert_dates_key_handling(tmp_path, version): + temp_file = tmp_path / "test.dta" + df = DataFrame({"old_name": [1, 2, 3], "some_other_name": [4, 5, 6]}) + writer = StataWriter(temp_file, df) + + # Case 1: Key exists in _convert_dates + writer._convert_dates = {"old_name": "converted_date"} + columns = ["new_name"] + original_columns = ["old_name"] + for c, o in zip(columns, original_columns): + if c != o and o in writer._convert_dates: + writer._convert_dates[c] = writer._convert_dates[o] + del writer._convert_dates[o] + assert writer._convert_dates == {"new_name": "converted_date"} + + # Case 2: Key does not exist in _convert_dates + writer._convert_dates = {"some_other_name": "converted_date"} + columns = ["new_name"] + original_columns = ["old_name"] + for c, o in zip(columns, original_columns): + if c != o and o in writer._convert_dates: + writer._convert_dates[c] = writer._convert_dates[o] + del writer._convert_dates[o] + assert writer._convert_dates == {"some_other_name": "converted_date"} From 2d66e5984e2fe93b9b1122b07eff0a371f5e7eae Mon Sep 17 00:00:00 2001 From: UV Date: Fri, 10 Jan 2025 23:19:24 +0530 Subject: [PATCH 3/5] Updated test_stata.py --- pandas/tests/io/test_stata.py | 42 +++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index f4e7e48cecb5c..677dc30532dc5 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -2593,24 +2593,32 @@ def test_many_strl(temp_file, version): def test_convert_dates_key_handling(tmp_path, version): temp_file = tmp_path / "test.dta" df = DataFrame({"old_name": [1, 2, 3], "some_other_name": [4, 5, 6]}) - writer = StataWriter(temp_file, df) # Case 1: Key exists in _convert_dates - writer._convert_dates = {"old_name": "converted_date"} - columns = ["new_name"] - original_columns = ["old_name"] - for c, o in zip(columns, original_columns): - if c != o and o in writer._convert_dates: - writer._convert_dates[c] = writer._convert_dates[o] - del writer._convert_dates[o] - assert writer._convert_dates == {"new_name": "converted_date"} + convert_dates = {"old_name": "converted_date"} + df.rename(columns={"old_name": "new_name"}, inplace=True) + with StataWriter( + temp_file, + df, + convert_dates=convert_dates, + version=version, + ) as writer: + writer.write_file() + result = read_stata(temp_file) + assert list(result.columns) == ["new_name", "some_other_name"] + assert "converted_date" in result.columns # Case 2: Key does not exist in _convert_dates - writer._convert_dates = {"some_other_name": "converted_date"} - columns = ["new_name"] - original_columns = ["old_name"] - for c, o in zip(columns, original_columns): - if c != o and o in writer._convert_dates: - writer._convert_dates[c] = writer._convert_dates[o] - del writer._convert_dates[o] - assert writer._convert_dates == {"some_other_name": "converted_date"} + df = DataFrame({"old_name": [1, 2, 3], "some_other_name": [4, 5, 6]}) + convert_dates = {"some_other_name": "converted_date"} + df.rename(columns={"old_name": "new_name"}, inplace=True) + with StataWriter( + temp_file, + df, + convert_dates=convert_dates, + version=version, + ) as writer: + writer.write_file() + result = read_stata(temp_file) + assert list(result.columns) == ["new_name", "some_other_name"] + assert "converted_date" in result.columns From 6bbd33dfcc45f1ea0aabffb20deb779a033c7696 Mon Sep 17 00:00:00 2001 From: UV Date: Fri, 10 Jan 2025 23:22:00 +0530 Subject: [PATCH 4/5] Added issue reference --- pandas/tests/io/test_stata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 677dc30532dc5..d3ef15adb13c5 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -2591,6 +2591,7 @@ def test_many_strl(temp_file, version): @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_convert_dates_key_handling(tmp_path, version): + # GH 60536 temp_file = tmp_path / "test.dta" df = DataFrame({"old_name": [1, 2, 3], "some_other_name": [4, 5, 6]}) From 55d62a1b8051e3c3e3bd9648e72484fbc2e1d241 Mon Sep 17 00:00:00 2001 From: UV Date: Sat, 11 Jan 2025 08:23:20 +0530 Subject: [PATCH 5/5] Updated test_stata.py --- pandas/tests/io/test_stata.py | 39 ++++++++++++++--------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index d3ef15adb13c5..5ed14806d6346 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -2591,35 +2591,26 @@ def test_many_strl(temp_file, version): @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_convert_dates_key_handling(tmp_path, version): - # GH 60536 + temp_file = tmp_path / "test.dta" df = DataFrame({"old_name": [1, 2, 3], "some_other_name": [4, 5, 6]}) - # Case 1: Key exists in _convert_dates + # Case 1: Key exists in convert_dates convert_dates = {"old_name": "converted_date"} - df.rename(columns={"old_name": "new_name"}, inplace=True) - with StataWriter( - temp_file, - df, - convert_dates=convert_dates, - version=version, - ) as writer: - writer.write_file() + df_renamed = df.rename(columns={"old_name": "new_name"}) # Mimic column renaming + df_renamed.to_stata(temp_file, convert_dates=convert_dates) + result = read_stata(temp_file) - assert list(result.columns) == ["new_name", "some_other_name"] - assert "converted_date" in result.columns + assert "new_name" in result.columns + assert "old_name" not in result.columns + assert result["new_name"].tolist() == ["converted_date", "converted_date", "converted_date"] - # Case 2: Key does not exist in _convert_dates - df = DataFrame({"old_name": [1, 2, 3], "some_other_name": [4, 5, 6]}) + # Case 2: Key does not exist in convert_dates convert_dates = {"some_other_name": "converted_date"} - df.rename(columns={"old_name": "new_name"}, inplace=True) - with StataWriter( - temp_file, - df, - convert_dates=convert_dates, - version=version, - ) as writer: - writer.write_file() + df_renamed = df.rename(columns={"old_name": "new_name"}) # Mimic column renaming + df_renamed.to_stata(temp_file, convert_dates=convert_dates) + result = read_stata(temp_file) - assert list(result.columns) == ["new_name", "some_other_name"] - assert "converted_date" in result.columns + assert "new_name" not in result.columns + assert "old_name" in result.columns + assert result["some_other_name"].tolist() == ["converted_date", "converted_date", "converted_date"]