From a832dcc699e778db64048b0d858069208d66e24f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 18 Oct 2023 23:58:40 +0200 Subject: [PATCH 1/6] correct convert_json_field_to_pandas_type --- pandas/io/json/_table_schema.py | 3 +++ .../tests/io/json/test_json_table_schema.py | 27 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 3f2291ba7a0c3..4d080d6664a9b 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -15,6 +15,7 @@ from pandas._libs import lib from pandas._libs.json import ujson_loads from pandas._libs.tslibs import timezones +from pandas._libs.tslibs.dtypes import freq_to_period_freqstr from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.base import _registry as registry @@ -207,6 +208,8 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: if field.get("tz"): return f"datetime64[ns, {field['tz']}]" elif field.get("freq"): + # GH#9586 rename frequency M to ME for offsets + field['freq'] = freq_to_period_freqstr(1, field['freq']) # GH#47747 using datetime over period to minimize the change surface return f"period[{field['freq']}]" else: diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 943515acd33b5..ef6470246a4d0 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -845,3 +845,30 @@ def test_read_json_orient_table_old_schema_version(self): expected = DataFrame({"a": [1, 2.0, "s"]}) result = pd.read_json(StringIO(df_json), orient="table") tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "index_nm", + [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"], + ) + @pytest.mark.parametrize( + "vals", + [ + {"ints": [1, 2]}, + {"objects": ["a", "b"]}, + {"objects": ["1", "2"]}, + {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=2)}, + {"floats": [1.0, 2.0]}, + {"bools": [True, False]}, + ], + ) + def test_read_json_table_orient_period_depr_freq(self, index_nm, vals, recwarn): + # GH#9586 + df = DataFrame( + vals, + index=pd.Index( + (pd.Period("2022-01"), pd.Period("2022-04")), name=index_nm + ), + ) + out = df.to_json(orient="table") + result = pd.read_json(out, orient="table") + tm.assert_frame_equal(df, result) From e55001e741ca925776e66737fa8fb2962c4e76d3 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 19 Oct 2023 00:20:22 +0200 Subject: [PATCH 2/6] fix pre-commit errors --- pandas/io/json/_table_schema.py | 2 +- pandas/tests/io/json/test_json_table_schema.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 4d080d6664a9b..fa662c285e3e4 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -209,7 +209,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: return f"datetime64[ns, {field['tz']}]" elif field.get("freq"): # GH#9586 rename frequency M to ME for offsets - field['freq'] = freq_to_period_freqstr(1, field['freq']) + field["freq"] = freq_to_period_freqstr(1, field["freq"]) # GH#47747 using datetime over period to minimize the change surface return f"period[{field['freq']}]" else: diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index ef6470246a4d0..698956641cd72 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -865,9 +865,7 @@ def test_read_json_table_orient_period_depr_freq(self, index_nm, vals, recwarn): # GH#9586 df = DataFrame( vals, - index=pd.Index( - (pd.Period("2022-01"), pd.Period("2022-04")), name=index_nm - ), + index=pd.Index((pd.Period("2022-01"), pd.Period("2022-04")), name=index_nm), ) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") From e4548924ac5c94379c2cd2b4f7b58037ffca964e Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 20 Oct 2023 22:30:50 +0200 Subject: [PATCH 3/6] correct def convert_json_field_to_pandas_type in case freq_n>1 --- pandas/io/json/_table_schema.py | 7 ++++-- .../tests/io/json/test_json_table_schema.py | 22 ++++--------------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index fa662c285e3e4..0ef58c6997343 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -5,6 +5,7 @@ """ from __future__ import annotations +import re from typing import ( TYPE_CHECKING, Any, @@ -209,9 +210,11 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: return f"datetime64[ns, {field['tz']}]" elif field.get("freq"): # GH#9586 rename frequency M to ME for offsets - field["freq"] = freq_to_period_freqstr(1, field["freq"]) + freq_name = re.split("[0-9]*", field["freq"], maxsplit=1)[1] + freq_n = field["freq"][: field["freq"].index(freq_name)] + freq = freq_to_period_freqstr(freq_n, freq_name) # GH#47747 using datetime over period to minimize the change surface - return f"period[{field['freq']}]" + return f"period[{freq}]" else: return "datetime64[ns]" elif typ == "any": diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 698956641cd72..722da9fb77ce6 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -846,27 +846,13 @@ def test_read_json_orient_table_old_schema_version(self): result = pd.read_json(StringIO(df_json), orient="table") tm.assert_frame_equal(expected, result) - @pytest.mark.parametrize( - "index_nm", - [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"], - ) - @pytest.mark.parametrize( - "vals", - [ - {"ints": [1, 2]}, - {"objects": ["a", "b"]}, - {"objects": ["1", "2"]}, - {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=2)}, - {"floats": [1.0, 2.0]}, - {"bools": [True, False]}, - ], - ) - def test_read_json_table_orient_period_depr_freq(self, index_nm, vals, recwarn): + def test_read_json_table_orient_period_depr_freq(self, recwarn): # GH#9586 df = DataFrame( - vals, - index=pd.Index((pd.Period("2022-01"), pd.Period("2022-04")), name=index_nm), + {"ints": [1, 2]}, + index=pd.PeriodIndex(["2011-01", "2011-08"], freq="2M"), ) + print(df.index) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) From c0a7ee6522c43c25bdd9db005d36fbbd620b6873 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 21 Oct 2023 13:40:12 +0200 Subject: [PATCH 4/6] remove print --- pandas/tests/io/json/test_json_table_schema.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 722da9fb77ce6..68c25b1744e9b 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -852,7 +852,6 @@ def test_read_json_table_orient_period_depr_freq(self, recwarn): {"ints": [1, 2]}, index=pd.PeriodIndex(["2011-01", "2011-08"], freq="2M"), ) - print(df.index) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) From 7a98c81c34ecb1d113e3a7d87850107eb131d2ae Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 21 Oct 2023 16:18:10 +0200 Subject: [PATCH 5/6] add parameterization to the test --- pandas/tests/io/json/test_json_table_schema.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 68c25b1744e9b..ddab3887db810 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -846,11 +846,12 @@ def test_read_json_orient_table_old_schema_version(self): result = pd.read_json(StringIO(df_json), orient="table") tm.assert_frame_equal(expected, result) - def test_read_json_table_orient_period_depr_freq(self, recwarn): + @pytest.mark.parametrize("freq", ["M", "2M"]) + def test_read_json_table_orient_period_depr_freq(self, freq, recwarn): # GH#9586 df = DataFrame( {"ints": [1, 2]}, - index=pd.PeriodIndex(["2011-01", "2011-08"], freq="2M"), + index=pd.PeriodIndex(["2020-01", "2020-06"], freq=freq), ) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") From e073336412d0f53c80159d0064694ef1b19bedd2 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 23 Oct 2023 20:00:17 +0200 Subject: [PATCH 6/6] replace the regex with to_offset --- pandas/io/json/_table_schema.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 0ef58c6997343..4d9fba72cf173 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -5,7 +5,6 @@ """ from __future__ import annotations -import re from typing import ( TYPE_CHECKING, Any, @@ -36,6 +35,8 @@ from pandas import DataFrame import pandas.core.common as com +from pandas.tseries.frequencies import to_offset + if TYPE_CHECKING: from pandas._typing import ( DtypeObj, @@ -210,8 +211,8 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: return f"datetime64[ns, {field['tz']}]" elif field.get("freq"): # GH#9586 rename frequency M to ME for offsets - freq_name = re.split("[0-9]*", field["freq"], maxsplit=1)[1] - freq_n = field["freq"][: field["freq"].index(freq_name)] + offset = to_offset(field["freq"]) + freq_n, freq_name = offset.n, offset.name freq = freq_to_period_freqstr(freq_n, freq_name) # GH#47747 using datetime over period to minimize the change surface return f"period[{freq}]"