From e747c168a27ed853c6049ce0d92baafa8e6a4496 Mon Sep 17 00:00:00 2001 From: Lavishgangwani Date: Mon, 7 Oct 2024 11:43:31 +0530 Subject: [PATCH 1/6] Fix: Change None values to NaN in combine_first method for better handling of missing data --- pandas/core/series.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index bbcb6615aeefd..7c6599c93abbb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3223,6 +3223,14 @@ def combine_first(self, other) -> Series: dtype: float64 """ from pandas.core.reshape.concat import concat + + # Change None values to NaN for both Series + def replace_none_with_nan(series): + series.fillna(value=np.nan, inplace=True) + + # Apply the function to both Series + replace_none_with_nan(self) + replace_none_with_nan(other) # Removed the extra parenthesis here if self.dtype == other.dtype: if self.index.equals(other.index): From 881f5231449177c656deac528e7f3be734482146 Mon Sep 17 00:00:00 2001 From: Lavishgangwani Date: Mon, 7 Oct 2024 13:02:38 +0530 Subject: [PATCH 2/6] Refactor: Clean up comments and improve function documentation in Series class --- pandas/core/series.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7c6599c93abbb..1be74df8c9ac3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3223,14 +3223,13 @@ def combine_first(self, other) -> Series: dtype: float64 """ from pandas.core.reshape.concat import concat - - # Change None values to NaN for both Series + def replace_none_with_nan(series): series.fillna(value=np.nan, inplace=True) - # Apply the function to both Series + """Apply the function to both Series""" replace_none_with_nan(self) - replace_none_with_nan(other) # Removed the extra parenthesis here + replace_none_with_nan(other) if self.dtype == other.dtype: if self.index.equals(other.index): From 75d601881c960e45dd27491783f1c64d0b28f428 Mon Sep 17 00:00:00 2001 From: Lavishgangwani Date: Wed, 16 Oct 2024 10:51:32 +0530 Subject: [PATCH 3/6] Fix date parsing issue in arrow_parser_wrapper --- pandas/io/parsers/arrow_parser_wrapper.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 86bb5f190e403..d408c4ce8a0d5 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -274,6 +274,18 @@ def read(self) -> DataFrame: dtype_backend = self.kwds["dtype_backend"] + # Handle missing date values by checking for timestamp columns + for i, field in enumerate(table.schema): + if pa.types.is_timestamp(field.type): # Check if the column is a timestamp + # Convert to a Pandas Series + column = table.column(i).to_pandas() + + # Replace missing values with NaT + column.fillna(pd.NaT, inplace=True) + + # Update the column back to the table + table = table.set_column(i, field.name, pa.array(column)) + # Convert all pa.null() cols -> float64 (non nullable) # else Int64 (nullable case, see below) if dtype_backend is lib.no_default: From fa43a5b5ff64eb3a6a6a669d3c4829bc47f270d3 Mon Sep 17 00:00:00 2001 From: Lavishgangwani Date: Wed, 16 Oct 2024 11:02:59 +0530 Subject: [PATCH 4/6] Clean up comments and improve focumentation in arrow_parser_wrapper.py --- pandas/io/parsers/arrow_parser_wrapper.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index d408c4ce8a0d5..2e5e40cf8f760 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -276,14 +276,11 @@ def read(self) -> DataFrame: # Handle missing date values by checking for timestamp columns for i, field in enumerate(table.schema): - if pa.types.is_timestamp(field.type): # Check if the column is a timestamp - # Convert to a Pandas Series + if pa.types.is_timestamp(field.type): column = table.column(i).to_pandas() - # Replace missing values with NaT column.fillna(pd.NaT, inplace=True) - # Update the column back to the table table = table.set_column(i, field.name, pa.array(column)) # Convert all pa.null() cols -> float64 (non nullable) From 7d7c5196578456909a464773076674c69b91526b Mon Sep 17 00:00:00 2001 From: Lavishgangwani Date: Wed, 16 Oct 2024 15:59:23 +0530 Subject: [PATCH 5/6] refactor cleaned up comments --- pandas/io/parsers/arrow_parser_wrapper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 2e5e40cf8f760..b0dca668b8dcf 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -278,7 +278,6 @@ def read(self) -> DataFrame: for i, field in enumerate(table.schema): if pa.types.is_timestamp(field.type): column = table.column(i).to_pandas() - column.fillna(pd.NaT, inplace=True) table = table.set_column(i, field.name, pa.array(column)) From f0d07ee55c5205665f9a741fbb0dd82017d9e781 Mon Sep 17 00:00:00 2001 From: Lavishgangwani Date: Mon, 11 Nov 2024 15:19:53 +0530 Subject: [PATCH 6/6] Fix broadcasting in logical_op function for DataFrame and Series operations --- pandas/core/ops/array_ops.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 983a3df57e369..bed413d1f4d68 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -57,7 +57,8 @@ from pandas.core.ops import missing from pandas.core.ops.dispatch import should_extension_dispatch from pandas.core.ops.invalid import invalid_comparison - +from pandas.core.frame import DataFrame +from pandas.core.series import Series if TYPE_CHECKING: from pandas._typing import ( ArrayLike, @@ -405,6 +406,25 @@ def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: ------- ndarray or ExtensionArray """ + def logical_operator(left: ArrayLike, right: Any, op) -> ArrayLike: + # Check if `right` is a Series and `left` is a DataFrame, and apply broadcasting + if isinstance(left, DataFrame) and isinstance(right, Series): + right = right.values # Convert Series to array for broadcasting + + # Convert right to a scalar or valid object if necessary + right = lib.item_from_zerodim(right) + + # Check for dtype-less sequences (e.g., list, tuple) and raise error + if is_list_like(right) and not hasattr(right, "dtype"): + # Raise an error if `right` is a list or tuple without a dtype + raise TypeError( + "Logical ops (and, or, xor) between Pandas objects and dtype-less " + "sequences (e.g., list, tuple) are no longer supported. " + "Wrap the object in a Series, Index, or np.array before operating." + ) + + # Proceed with the logical operation + return logical_operator(op(left, right)) def fill_bool(x, left=None): # if `left` is specifically not-boolean, we do not cast to bool