From a5a77e31ba7618e9f300e7ab45f9097ebb5d0244 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Mon, 26 Jun 2023 00:38:41 +0800 Subject: [PATCH 1/2] BUG: complex Series/DataFrame display all complex nans as nan+0j --- pandas/io/formats/format.py | 45 ++++++++++++++++++++---- pandas/tests/io/formats/test_printing.py | 14 +++++++- 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6cc00ffc8889c..a81924a7844de 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1492,6 +1492,34 @@ def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): ).reshape(values.shape) return formatted + def format_complex_with_na_rep( + values: ArrayLike, formatter: Callable, na_rep: str + ): + real_values, imag_values = np.real(values).ravel(), np.imag(values).ravel() + real_mask, imag_mask = isna(real_values), isna(imag_values) + formatted_lst = [] + for val, real_val, imag_val, re_isna, im_isna in zip( + values.ravel(), + real_values, + imag_values, + real_mask, + imag_mask, + ): + if not re_isna and not im_isna: + formatted_lst.append(formatter(val)) + elif not re_isna: # xxx+nanj + formatted_lst.append(f"{formatter(real_val)}+{na_rep}j") + elif not im_isna: # nan[+/-]xxxj + # The imaginary part may either start with a "-" or a space + imag_formatted = formatter(imag_val).strip() + if imag_formatted.startswith("-"): + formatted_lst.append(f"{na_rep}{imag_formatted}j") + else: + formatted_lst.append(f"{na_rep}+{imag_formatted}j") + else: # nan+nanj + formatted_lst.append(f"{na_rep}+{na_rep}j") + return np.array(formatted_lst).reshape(values.shape) + if self.formatter is not None: return format_with_na_rep(self.values, self.formatter, self.na_rep) @@ -1512,11 +1540,12 @@ def format_values_with(float_format): # need to distinguish complex and float NaNs (GH #53762) values = self.values is_complex = is_complex_dtype(values) - if is_complex: - na_rep = f"{na_rep}+{0:.{self.digits}f}j" # separate the wheat from the chaff - values = format_with_na_rep(values, formatter, na_rep) + if is_complex: + values = format_complex_with_na_rep(values, formatter, na_rep) + else: + values = format_with_na_rep(values, formatter, na_rep) if self.fixed_width: if is_complex: @@ -1917,7 +1946,7 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[s real_part, imag_part = [], [] for x in str_complexes: # Complex numbers are represented as "(-)xxx(+/-)xxxj" - # The split will give [maybe "-", "xxx", "+/-", "xxx", "j", ""] + # The split will give [{"", "-"}, "xxx", "+/-", "xxx", "j", ""] # Therefore, the imaginary part is the 4th and 3rd last elements, # and the real part is everything before the imaginary part trimmed = re.split(r"([j+-])", x) @@ -1929,11 +1958,13 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[s # in the array n = len(str_complexes) padded_parts = _trim_zeros_float(real_part + imag_part, decimal) + padded_length = max(len(part) for part in padded_parts) - 1 padded = [ - padded_parts[i] # real part (including - or space, possibly "NaN") - + padded_parts[i + n] # imaginary part (including + or -) + real_pt # real part, possibly NaN + + imag_pt[0] # +/- + + f"{imag_pt[1:]:>{padded_length}}" # complex part (no sign), possibly nan + "j" - for i in range(n) + for real_pt, imag_pt in zip(padded_parts[:n], padded_parts[n:]) ] return padded diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index dc106c9bebd45..555efd8d90ade 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -218,11 +218,23 @@ def test_multiindex_long_element(): ([-2, complex("nan"), -1], ["-2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]), ([-1.23j, complex("nan"), -1], ["-0.00-1.23j", " NaN+0.00j", "-1.00+0.00j"]), ([1.23j, complex("nan"), 1.23], [" 0.00+1.23j", " NaN+0.00j", " 1.23+0.00j"]), + ( + [-1.23j, complex(np.nan, np.nan), 1], + ["-0.00-1.23j", " NaN+ NaNj", " 1.00+0.00j"], + ), + ( + [-1.23j, complex(1.2, np.nan), 1], + ["-0.00-1.23j", " 1.20+ NaNj", " 1.00+0.00j"], + ), + ( + [-1.23j, complex(np.nan, -1.2), 1], + ["-0.00-1.23j", " NaN-1.20j", " 1.00+0.00j"], + ), ], ) @pytest.mark.parametrize("as_frame", [True, False]) def test_ser_df_with_complex_nans(data, output, as_frame): - # GH#53762 + # GH#53762, GH#53841 obj = pd.Series(data) if as_frame: obj = obj.to_frame(name="val") From 910e7944261a31709d7ce87791da68517dee4162 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Mon, 26 Jun 2023 20:19:32 +0800 Subject: [PATCH 2/2] resolve mypy via type: ignore[arg-type] --- pandas/io/formats/format.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index a81924a7844de..2fd284323fc5e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1495,7 +1495,8 @@ def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): def format_complex_with_na_rep( values: ArrayLike, formatter: Callable, na_rep: str ): - real_values, imag_values = np.real(values).ravel(), np.imag(values).ravel() + real_values = np.real(values).ravel() # type: ignore[arg-type] + imag_values = np.imag(values).ravel() # type: ignore[arg-type] real_mask, imag_mask = isna(real_values), isna(imag_values) formatted_lst = [] for val, real_val, imag_val, re_isna, im_isna in zip(