From 3697d41d8e4c87ec108e81bbfa768129f3b3b1b2 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Tue, 14 Jul 2020 16:40:15 +0200 Subject: [PATCH 01/11] wip: remove part of the bug --- packages/python/plotly/plotly/express/_core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index fcbac2202d1..eb4875f2c87 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1546,8 +1546,9 @@ def aggfunc_continuous(x): for col in cols: # for hover_data, custom_data etc. if col not in agg_f: agg_f[col] = aggfunc_discrete + # Avoid collisions with reserved names - columns in the path have been copied already + cols = list(set(cols) - set(["labels", "parent", "id"])) # ---------------------------------------------------------------------------- - df_all_trees = pd.DataFrame(columns=["labels", "parent", "id"] + cols) # Set column type here (useful for continuous vs discrete colorscale) for col in cols: From 79ac151cc2070489e873fb2868a1a866081c8409 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 00:15:45 +0200 Subject: [PATCH 02/11] added test --- .../test_core/test_px/test_px_functions.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py index 8dde6a7be4e..579c6524712 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py @@ -250,6 +250,25 @@ def test_sunburst_treemap_with_path_color(): assert np.all(np.array(colors[:8]) == np.array(calls)) +def test_sunburst_treemap_column_parent(): + vendors = ["A", "B", "C", "D", "E", "F", "G", "H"] + sectors = [ + "Tech", + "Tech", + "Finance", + "Finance", + "Tech", + "Tech", + "Finance", + "Finance", + ] + regions = ["North", "North", "North", "North", "South", "South", "South", "South"] + values = [1, 3, 2, 4, 2, 2, 1, 4] + df = pd.DataFrame(dict(id=vendors, sectors=sectors, parent=regions, values=values,)) + path = ["parent", "sectors", "id"] + fig = px.sunburst(df, path=path, values="values") + + def test_sunburst_treemap_with_path_non_rectangular(): vendors = ["A", "B", "C", "D", None, "E", "F", "G", "H", None] sectors = [ From 4e93aa73899d1fafa3745320c2048666a9862ed3 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 11:17:56 +0200 Subject: [PATCH 03/11] handle more cases --- .../python/plotly/plotly/express/_core.py | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index eb4875f2c87..9f7ecbde20f 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1052,6 +1052,15 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): else: df_output[df_input.columns] = df_input[df_input.columns] + # Case of sunburst and treemap, used with the `path` argument + # in which case the dataframe is massaged into a new format + # and column with reserved names will be created + uses_path = False + reserved_names = [] + if "path" in args and args["path"] is not None: + uses_path = True + reserved_names = ["id", "labels", "parent"] + # hover_data is a dict hover_data_is_dict = ( "hover_data" in args @@ -1095,6 +1104,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): if argument is None: continue col_name = None + # Case of multiindex if isinstance(argument, pd.MultiIndex): raise TypeError( @@ -1114,7 +1124,19 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): ranges.append(col_name) # ----------------- argument is likely a col name ---------------------- elif isinstance(argument, str) or not hasattr(argument, "__len__"): - if ( + if uses_path and argument in reserved_names and field_name != 'path': + if (args["labels"] is None or argument not in args["labels"]): + raise ValueError( + "%s is a reserved name for px.sunburst and px.treemap. " + "Please use the labels argument to provide another name " + "for the column, for example " + "labels={'%s': '%s_col'}" + % (argument, argument, argument) + ) + else: + col_name = args["labels"][argument] + df_output[col_name] = df_input[argument] + elif ( field_name == "hover_data" and hover_data_is_dict and args["hover_data"][str(argument)][1] is not None @@ -1484,12 +1506,15 @@ def process_dataframe_hierarchy(args): _check_dataframe_all_leaves(df[path[::-1]]) discrete_color = False + reserved_names = ["labels", "parent", "id"] + new_path = [] for col_name in path: new_col_name = col_name + "_path_copy" new_path.append(new_col_name) df[new_col_name] = df[col_name] path = new_path + # ------------ Define aggregation functions -------------------------------- def aggfunc_discrete(x): @@ -1547,9 +1572,14 @@ def aggfunc_continuous(x): if col not in agg_f: agg_f[col] = aggfunc_discrete # Avoid collisions with reserved names - columns in the path have been copied already - cols = list(set(cols) - set(["labels", "parent", "id"])) + #for col in cols: + # if col in reserved_names and args["labels"] is not None and col in args["labels"]: + # df[args["labels"][col]] = df[col] + cols = list(set(cols) - set(reserved_names)) + + # ---------------------------------------------------------------------------- - df_all_trees = pd.DataFrame(columns=["labels", "parent", "id"] + cols) + df_all_trees = pd.DataFrame(columns=reserved_names + cols) # Set column type here (useful for continuous vs discrete colorscale) for col in cols: df_all_trees[col] = df_all_trees[col].astype(df[col].dtype) From fe9bf1df9d7ee05e161dde2ee36ab7e532029f80 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 11:18:38 +0200 Subject: [PATCH 04/11] black --- packages/python/plotly/plotly/express/_core.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 9f7ecbde20f..c37e4b65072 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1124,15 +1124,14 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): ranges.append(col_name) # ----------------- argument is likely a col name ---------------------- elif isinstance(argument, str) or not hasattr(argument, "__len__"): - if uses_path and argument in reserved_names and field_name != 'path': - if (args["labels"] is None or argument not in args["labels"]): + if uses_path and argument in reserved_names and field_name != "path": + if args["labels"] is None or argument not in args["labels"]: raise ValueError( "%s is a reserved name for px.sunburst and px.treemap. " "Please use the labels argument to provide another name " "for the column, for example " - "labels={'%s': '%s_col'}" - % (argument, argument, argument) - ) + "labels={'%s': '%s_col'}" % (argument, argument, argument) + ) else: col_name = args["labels"][argument] df_output[col_name] = df_input[argument] @@ -1572,12 +1571,11 @@ def aggfunc_continuous(x): if col not in agg_f: agg_f[col] = aggfunc_discrete # Avoid collisions with reserved names - columns in the path have been copied already - #for col in cols: + # for col in cols: # if col in reserved_names and args["labels"] is not None and col in args["labels"]: # df[args["labels"][col]] = df[col] cols = list(set(cols) - set(reserved_names)) - # ---------------------------------------------------------------------------- df_all_trees = pd.DataFrame(columns=reserved_names + cols) # Set column type here (useful for continuous vs discrete colorscale) From a482486ded0c188692bc560a7a8d7cd5ba318b65 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 12:04:49 +0200 Subject: [PATCH 05/11] add tests --- .../test_core/test_px/test_px_functions.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py index 579c6524712..1697c0a72a4 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py @@ -264,9 +264,37 @@ def test_sunburst_treemap_column_parent(): ] regions = ["North", "North", "North", "North", "South", "South", "South", "South"] values = [1, 3, 2, 4, 2, 2, 1, 4] + # One column of the path is a reserved name - this is ok and should not raise df = pd.DataFrame(dict(id=vendors, sectors=sectors, parent=regions, values=values,)) path = ["parent", "sectors", "id"] fig = px.sunburst(df, path=path, values="values") + # Use a reserved name in a non-path argument - should raise a ValueError + df = pd.DataFrame( + dict( + vendors=vendors, + sectors=sectors, + regions=regions, + parent=vendors, + values=values, + ) + ) + path = ["regions", "sectors", "vendors"] + with pytest.raises(ValueError) as err_msg: + fig = px.sunburst(df, path=path, color="parent") + assert "parent is a reserved name for px.sunburst and px.treemap." in str( + err_msg.value + ) + with pytest.raises(ValueError) as err_msg: + fig = px.sunburst(df, path=path, hover_data=["parent"]) + assert "parent is a reserved name for px.sunburst and px.treemap." in str( + err_msg.value + ) + # Use a reserved_name in a non-path argument but modify with labels - this is ok + fig = px.sunburst(df, path=path, color="parent", labels={"parent": "parent_color"}) + assert "parent_color=%{customdata[0]}" in fig.data[0].hovertemplate + assert np.all(np.array(fig.data[0].customdata).ravel()[:8] == np.array(vendors)) + # Dataframe has a parent column but it's not used, this should not raise. + fig = px.sunburst(df, path=path) def test_sunburst_treemap_with_path_non_rectangular(): From 3696997fbf71191b3abe02ecfecc04a10999ba43 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 12:09:35 +0200 Subject: [PATCH 06/11] debug --- .../python/plotly/plotly/express/_core.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index c37e4b65072..5af8bbb74ef 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1060,6 +1060,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): if "path" in args and args["path"] is not None: uses_path = True reserved_names = ["id", "labels", "parent"] + print(uses_path) # hover_data is a dict hover_data_is_dict = ( @@ -1124,18 +1125,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): ranges.append(col_name) # ----------------- argument is likely a col name ---------------------- elif isinstance(argument, str) or not hasattr(argument, "__len__"): - if uses_path and argument in reserved_names and field_name != "path": - if args["labels"] is None or argument not in args["labels"]: - raise ValueError( - "%s is a reserved name for px.sunburst and px.treemap. " - "Please use the labels argument to provide another name " - "for the column, for example " - "labels={'%s': '%s_col'}" % (argument, argument, argument) - ) - else: - col_name = args["labels"][argument] - df_output[col_name] = df_input[argument] - elif ( + if ( field_name == "hover_data" and hover_data_is_dict and args["hover_data"][str(argument)][1] is not None @@ -1196,6 +1186,18 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): else: col_name = str(argument) df_output[col_name] = df_input[argument].values + if uses_path and argument in reserved_names and field_name != "path": + if args["labels"] is None or argument not in args["labels"]: + raise ValueError( + "%s is a reserved name for px.sunburst and px.treemap. " + "Please use the labels argument to provide another name " + "for the column, for example " + "labels={'%s': '%s_col'}" % (argument, argument, argument) + ) + else: + col_name = args["labels"][argument] + df_output[col_name] = df_input[argument] + # ----------------- argument is likely a column / array / list.... ------- else: if df_provided and hasattr(argument, "name"): From bd2091fdcf48d0c0326a2650fa00bbb7e34669e1 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 12:23:39 +0200 Subject: [PATCH 07/11] added tests and solved name conflict --- packages/python/plotly/plotly/express/_core.py | 11 +++++++---- .../tests/test_core/test_px/test_px_functions.py | 8 ++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 5af8bbb74ef..86ad3e9d3f7 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1056,11 +1056,10 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): # in which case the dataframe is massaged into a new format # and column with reserved names will be created uses_path = False - reserved_names = [] + reserved_names_sunburst = [] if "path" in args and args["path"] is not None: uses_path = True - reserved_names = ["id", "labels", "parent"] - print(uses_path) + reserved_names_sunburst = ["id", "labels", "parent"] # hover_data is a dict hover_data_is_dict = ( @@ -1186,7 +1185,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): else: col_name = str(argument) df_output[col_name] = df_input[argument].values - if uses_path and argument in reserved_names and field_name != "path": + if ( + uses_path + and argument in reserved_names_sunburst + and field_name != "path" + ): if args["labels"] is None or argument not in args["labels"]: raise ValueError( "%s is a reserved name for px.sunburst and px.treemap. " diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py index 1697c0a72a4..aaaa25c5100 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py @@ -289,10 +289,18 @@ def test_sunburst_treemap_column_parent(): assert "parent is a reserved name for px.sunburst and px.treemap." in str( err_msg.value ) + with pytest.raises(ValueError) as err_msg: + fig = px.sunburst(df, path=path, hover_data={"parent": True}) + assert "parent is a reserved name for px.sunburst and px.treemap." in str( + err_msg.value + ) # Use a reserved_name in a non-path argument but modify with labels - this is ok fig = px.sunburst(df, path=path, color="parent", labels={"parent": "parent_color"}) assert "parent_color=%{customdata[0]}" in fig.data[0].hovertemplate assert np.all(np.array(fig.data[0].customdata).ravel()[:8] == np.array(vendors)) + fig = px.sunburst( + df, path=path, hover_data={"parent": True}, labels={"parent": "parent_color"} + ) # Dataframe has a parent column but it's not used, this should not raise. fig = px.sunburst(df, path=path) From 25e375fab03201c8ec7307c393960abb8d0aab22 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 12:27:51 +0200 Subject: [PATCH 08/11] fine-tuning --- packages/python/plotly/plotly/express/_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 86ad3e9d3f7..329db4be6cd 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1198,8 +1198,8 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): "labels={'%s': '%s_col'}" % (argument, argument, argument) ) else: - col_name = args["labels"][argument] - df_output[col_name] = df_input[argument] + col_name = str(args["labels"][argument]) + df_output[col_name] = df_input[str(argument)].values # ----------------- argument is likely a column / array / list.... ------- else: From e8afa1cced4f2ea890eabfc628697da04d6bd435 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 18:07:33 +0200 Subject: [PATCH 09/11] Revert "added test" This reverts commit 79ac151cc2070489e873fb2868a1a866081c8409. --- .../test_core/test_px/test_px_functions.py | 37 +------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py index aaaa25c5100..0e18f72928a 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py @@ -264,45 +264,10 @@ def test_sunburst_treemap_column_parent(): ] regions = ["North", "North", "North", "North", "South", "South", "South", "South"] values = [1, 3, 2, 4, 2, 2, 1, 4] - # One column of the path is a reserved name - this is ok and should not raise df = pd.DataFrame(dict(id=vendors, sectors=sectors, parent=regions, values=values,)) path = ["parent", "sectors", "id"] + # One column of the path is a reserved name - this is ok and should not raise fig = px.sunburst(df, path=path, values="values") - # Use a reserved name in a non-path argument - should raise a ValueError - df = pd.DataFrame( - dict( - vendors=vendors, - sectors=sectors, - regions=regions, - parent=vendors, - values=values, - ) - ) - path = ["regions", "sectors", "vendors"] - with pytest.raises(ValueError) as err_msg: - fig = px.sunburst(df, path=path, color="parent") - assert "parent is a reserved name for px.sunburst and px.treemap." in str( - err_msg.value - ) - with pytest.raises(ValueError) as err_msg: - fig = px.sunburst(df, path=path, hover_data=["parent"]) - assert "parent is a reserved name for px.sunburst and px.treemap." in str( - err_msg.value - ) - with pytest.raises(ValueError) as err_msg: - fig = px.sunburst(df, path=path, hover_data={"parent": True}) - assert "parent is a reserved name for px.sunburst and px.treemap." in str( - err_msg.value - ) - # Use a reserved_name in a non-path argument but modify with labels - this is ok - fig = px.sunburst(df, path=path, color="parent", labels={"parent": "parent_color"}) - assert "parent_color=%{customdata[0]}" in fig.data[0].hovertemplate - assert np.all(np.array(fig.data[0].customdata).ravel()[:8] == np.array(vendors)) - fig = px.sunburst( - df, path=path, hover_data={"parent": True}, labels={"parent": "parent_color"} - ) - # Dataframe has a parent column but it's not used, this should not raise. - fig = px.sunburst(df, path=path) def test_sunburst_treemap_with_path_non_rectangular(): From b450a1cdb2d04e71706bc46f2b9383fbe013f238 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 18:12:02 +0200 Subject: [PATCH 10/11] empty commit From 2fe89f634135ba299d13810a6816eaa27d5e2ebd Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Wed, 15 Jul 2020 18:52:19 +0200 Subject: [PATCH 11/11] debug --- .../python/plotly/plotly/express/_core.py | 37 +------------------ 1 file changed, 2 insertions(+), 35 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 329db4be6cd..eb4875f2c87 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1052,15 +1052,6 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): else: df_output[df_input.columns] = df_input[df_input.columns] - # Case of sunburst and treemap, used with the `path` argument - # in which case the dataframe is massaged into a new format - # and column with reserved names will be created - uses_path = False - reserved_names_sunburst = [] - if "path" in args and args["path"] is not None: - uses_path = True - reserved_names_sunburst = ["id", "labels", "parent"] - # hover_data is a dict hover_data_is_dict = ( "hover_data" in args @@ -1104,7 +1095,6 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): if argument is None: continue col_name = None - # Case of multiindex if isinstance(argument, pd.MultiIndex): raise TypeError( @@ -1185,22 +1175,6 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name): else: col_name = str(argument) df_output[col_name] = df_input[argument].values - if ( - uses_path - and argument in reserved_names_sunburst - and field_name != "path" - ): - if args["labels"] is None or argument not in args["labels"]: - raise ValueError( - "%s is a reserved name for px.sunburst and px.treemap. " - "Please use the labels argument to provide another name " - "for the column, for example " - "labels={'%s': '%s_col'}" % (argument, argument, argument) - ) - else: - col_name = str(args["labels"][argument]) - df_output[col_name] = df_input[str(argument)].values - # ----------------- argument is likely a column / array / list.... ------- else: if df_provided and hasattr(argument, "name"): @@ -1510,15 +1484,12 @@ def process_dataframe_hierarchy(args): _check_dataframe_all_leaves(df[path[::-1]]) discrete_color = False - reserved_names = ["labels", "parent", "id"] - new_path = [] for col_name in path: new_col_name = col_name + "_path_copy" new_path.append(new_col_name) df[new_col_name] = df[col_name] path = new_path - # ------------ Define aggregation functions -------------------------------- def aggfunc_discrete(x): @@ -1576,13 +1547,9 @@ def aggfunc_continuous(x): if col not in agg_f: agg_f[col] = aggfunc_discrete # Avoid collisions with reserved names - columns in the path have been copied already - # for col in cols: - # if col in reserved_names and args["labels"] is not None and col in args["labels"]: - # df[args["labels"][col]] = df[col] - cols = list(set(cols) - set(reserved_names)) - + cols = list(set(cols) - set(["labels", "parent", "id"])) # ---------------------------------------------------------------------------- - df_all_trees = pd.DataFrame(columns=reserved_names + cols) + df_all_trees = pd.DataFrame(columns=["labels", "parent", "id"] + cols) # Set column type here (useful for continuous vs discrete colorscale) for col in cols: df_all_trees[col] = df_all_trees[col].astype(df[col].dtype)