From 7d84cc3de784ff5345d0fd9a94272133da980a62 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Mon, 22 May 2023 11:32:48 +0200 Subject: [PATCH 1/2] Add a `fill_null` method to dataframe and column Follow-up to gh-167, which added `fill_nan`, and closes gh-142. --- .../dataframe_api/column_object.py | 13 ++++++++ .../dataframe_api/dataframe_object.py | 33 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 11b96ef2..10ba9a35 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -641,3 +641,16 @@ def fill_nan(self, value: float | 'null', /) -> Column: """ ... + + def fill_null(self, value: Scalar, /) -> Column: + """ + Fill null values with the given fill value. + + Parameters + ---------- + value : Scalar + Value used to replace any ``null`` values in the column with. + Must be of the Python scalar type matching the dtype of the column. + + """ + ... diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index c6fbb6f5..a5b151a8 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -774,3 +774,36 @@ def fill_nan(self, value: float | 'null', /) -> DataFrame: """ ... + + def fill_null( + self, value: Scalar, /, *, column_names : list[str] | None = None + ) -> DataFrame: + """ + Fill null values with the given fill value. + + This method can only be used if all columns that are to be filled are + of the same dtype kind (e.g., all floating-point, all integer, all + string or all datetime dtypes). If that is not the case, it is not + possible to use a single Python scalar type that matches the dtype of + all columns to which ``fill_null`` is being applied, and hence an + exception will be raised. + + Parameters + ---------- + value : Scalar + Value used to replace any ``null`` values in the dataframe with. + Must be of the Python scalar type matching the dtype(s) of the dataframe. + column_names : list[str] | None + A list of column names for which to replace nulls with the given + scalar value. + + Raises + ------ + TypeError + If the columns of the dataframe are not all of the same kind. + KeyError + If ``column_names`` contains a column name that is not present in + the dataframe. + + """ + ... From 502cd9ce0bf836c7a6c0c02d25dc523bf5052377 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 5 Jul 2023 23:25:23 +0200 Subject: [PATCH 2/2] Address review comments about `DataFrame.fill_null` --- .../dataframe_api/dataframe_object.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index a5b151a8..f56dae67 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -782,11 +782,10 @@ def fill_null( Fill null values with the given fill value. This method can only be used if all columns that are to be filled are - of the same dtype kind (e.g., all floating-point, all integer, all - string or all datetime dtypes). If that is not the case, it is not - possible to use a single Python scalar type that matches the dtype of - all columns to which ``fill_null`` is being applied, and hence an - exception will be raised. + of the same dtype (e.g., all of ``Float64`` or all of string dtype). + If that is not the case, it is not possible to use a single Python + scalar type that matches the dtype of all columns to which + ``fill_null`` is being applied, and hence an exception will be raised. Parameters ---------- @@ -795,7 +794,7 @@ def fill_null( Must be of the Python scalar type matching the dtype(s) of the dataframe. column_names : list[str] | None A list of column names for which to replace nulls with the given - scalar value. + scalar value. If ``None``, nulls will be replaced in all columns. Raises ------