diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 819318e119668..89a1c388b3ba1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -627,6 +627,7 @@ I/O - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) +- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`) - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`) - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`) - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 2916e4d98cce4..ffc2690a5efdf 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1648,7 +1648,7 @@ def _clean_na_values(na_values, keep_default_na: bool = True, floatify: bool = T if keep_default_na: v = set(v) | STR_NA_VALUES - na_values[k] = v + na_values[k] = _stringify_na_values(v, floatify) na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()} else: if not is_list_like(na_values): diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 360a5feebe073..b612e60c959b1 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -812,3 +812,21 @@ def test_bool_and_nan_to_float(all_parsers): result = parser.read_csv(StringIO(data), dtype="float") expected = DataFrame.from_dict({"0": [np.nan, 1.0, 0.0]}) tm.assert_frame_equal(result, expected) + + +@xfail_pyarrow +@pytest.mark.parametrize( + "na_values", + [[-99.0, -99], [-99, -99.0]], +) +def test_na_values_dict_without_dtype(all_parsers, na_values): + parser = all_parsers + data = """A +-99 +-99 +-99.0 +-99.0""" + + result = parser.read_csv(StringIO(data), na_values=na_values) + expected = DataFrame({"A": [np.nan, np.nan, np.nan, np.nan]}) + tm.assert_frame_equal(result, expected)