From be80dbaf2afcba52e949906ad1103001503019a0 Mon Sep 17 00:00:00 2001 From: "Benjamin A. Beasley" Date: Wed, 6 Apr 2022 07:13:22 -0400 Subject: [PATCH 1/2] Fix a few test failures on big-endian systems These are all due to tests expecting little-endian dtypes, where in fact the endianness of the dtype is that of the host. --- pandas/tests/arrays/boolean/test_astype.py | 5 +- .../tests/arrays/boolean/test_construction.py | 5 +- pandas/tests/arrays/floating/test_to_numpy.py | 5 +- pandas/tests/arrays/integer/test_dtypes.py | 5 +- pandas/tests/frame/methods/test_to_records.py | 137 ++++++++++++++---- pandas/tests/io/parser/test_c_parser_only.py | 8 +- pandas/tests/tools/test_to_timedelta.py | 4 +- 7 files changed, 131 insertions(+), 38 deletions(-) diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py index 57cec70262526..258d2a99efb82 100644 --- a/pandas/tests/arrays/boolean/test_astype.py +++ b/pandas/tests/arrays/boolean/test_astype.py @@ -1,3 +1,5 @@ +from sys import byteorder + import numpy as np import pytest @@ -20,7 +22,8 @@ def test_astype(): tm.assert_numpy_array_equal(result, expected) result = arr.astype("str") - expected = np.array(["True", "False", ""], dtype=""}[byteorder] + expected = np.array(["True", "False", ""], dtype=f"{endian}U5") tm.assert_numpy_array_equal(result, expected) # no missing values diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index 64b1786cbd101..9a9afbaa0baf8 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -1,3 +1,5 @@ +from sys import byteorder + import numpy as np import pytest @@ -273,7 +275,8 @@ def test_to_numpy(box): arr = con([True, False, None], dtype="boolean") result = arr.to_numpy(dtype="str") - expected = np.array([True, False, pd.NA], dtype=""}[byteorder] + expected = np.array([True, False, pd.NA], dtype=f"{endian}U5") tm.assert_numpy_array_equal(result, expected) # no missing values -> can convert to bool, otherwise raises diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py index 26e5687b1b4a0..e96e27d84c044 100644 --- a/pandas/tests/arrays/floating/test_to_numpy.py +++ b/pandas/tests/arrays/floating/test_to_numpy.py @@ -1,3 +1,5 @@ +from sys import byteorder + import numpy as np import pytest @@ -115,7 +117,8 @@ def test_to_numpy_string(box, dtype): arr = con([0.0, 1.0, None], dtype="Float64") result = arr.to_numpy(dtype="str") - expected = np.array([0.0, 1.0, pd.NA], dtype=""}[byteorder] + expected = np.array([0.0, 1.0, pd.NA], dtype=f"{endian}U32") tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 8348ff79b24ee..643d08c4b3bea 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -1,3 +1,5 @@ +from sys import byteorder + import numpy as np import pytest @@ -283,7 +285,8 @@ def test_to_numpy_na_raises(dtype): def test_astype_str(): a = pd.array([1, 2, None], dtype="Int64") - expected = np.array(["1", "2", ""], dtype=""}[byteorder] + expected = np.array(["1", "2", ""], dtype=f"{endian}U21") tm.assert_numpy_array_equal(a.astype(str), expected) tm.assert_numpy_array_equal(a.astype("str"), expected) diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index 1a84fb73fd524..d54e6f85f67b7 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -1,4 +1,5 @@ from collections import abc +from sys import byteorder import numpy as np import pytest @@ -14,6 +15,9 @@ import pandas._testing as tm +endian = {"little": "<", "big": ">"}[byteorder] + + class TestDataFrameToRecords: def test_to_records_timeseries(self): index = date_range("1/1/2000", periods=10) @@ -151,7 +155,12 @@ def test_to_records_with_categorical(self): {}, np.rec.array( [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], - dtype=[("index", ""}[byteorder] + + @pytest.mark.parametrize( "malformed", ["1\r1\r1\r 1\r 1\r", "1\r1\r1\r 1\r 1\r11\r", "1\r1\r1\r 1\r 1\r11\r1\r"], @@ -144,9 +148,9 @@ def test_dtype_and_names_error(c_parser_only): "the dtype timedelta64 is not supported for parsing", {"dtype": {"A": "timedelta64", "B": "float64"}}, ), - ("the dtype "}[byteorder] expected = Series( - [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype=" Date: Thu, 7 Apr 2022 08:34:08 -0400 Subject: [PATCH 2/2] Set ENDIAN in pandas._testing Setting an ENDIAN constant here to represent the character for host-endianness to use for numpy dtypes ("<" or ">") saves an import and a little duplicated code across a number of test modules. --- pandas/_testing/__init__.py | 3 + pandas/tests/arrays/boolean/test_astype.py | 5 +- .../tests/arrays/boolean/test_construction.py | 5 +- pandas/tests/arrays/floating/test_to_numpy.py | 5 +- pandas/tests/arrays/integer/test_dtypes.py | 5 +- pandas/tests/frame/methods/test_to_records.py | 126 ++++++++++-------- pandas/tests/io/parser/test_c_parser_only.py | 11 +- pandas/tests/tools/test_to_timedelta.py | 5 +- 8 files changed, 82 insertions(+), 83 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index fc48317114e23..0a62ee956be61 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -8,6 +8,7 @@ import os import re import string +from sys import byteorder from typing import ( TYPE_CHECKING, Callable, @@ -168,6 +169,8 @@ np.uint32, ] +ENDIAN = {"little": "<", "big": ">"}[byteorder] + NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")] NP_NAT_OBJECTS = [ cls("NaT", unit) diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py index 258d2a99efb82..932e903c0e448 100644 --- a/pandas/tests/arrays/boolean/test_astype.py +++ b/pandas/tests/arrays/boolean/test_astype.py @@ -1,5 +1,3 @@ -from sys import byteorder - import numpy as np import pytest @@ -22,8 +20,7 @@ def test_astype(): tm.assert_numpy_array_equal(result, expected) result = arr.astype("str") - endian = {"little": "<", "big": ">"}[byteorder] - expected = np.array(["True", "False", ""], dtype=f"{endian}U5") + expected = np.array(["True", "False", ""], dtype=f"{tm.ENDIAN}U5") tm.assert_numpy_array_equal(result, expected) # no missing values diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index 9a9afbaa0baf8..d26eea19c06e9 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -1,5 +1,3 @@ -from sys import byteorder - import numpy as np import pytest @@ -275,8 +273,7 @@ def test_to_numpy(box): arr = con([True, False, None], dtype="boolean") result = arr.to_numpy(dtype="str") - endian = {"little": "<", "big": ">"}[byteorder] - expected = np.array([True, False, pd.NA], dtype=f"{endian}U5") + expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5") tm.assert_numpy_array_equal(result, expected) # no missing values -> can convert to bool, otherwise raises diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py index e96e27d84c044..2ed52439adf53 100644 --- a/pandas/tests/arrays/floating/test_to_numpy.py +++ b/pandas/tests/arrays/floating/test_to_numpy.py @@ -1,5 +1,3 @@ -from sys import byteorder - import numpy as np import pytest @@ -117,8 +115,7 @@ def test_to_numpy_string(box, dtype): arr = con([0.0, 1.0, None], dtype="Float64") result = arr.to_numpy(dtype="str") - endian = {"little": "<", "big": ">"}[byteorder] - expected = np.array([0.0, 1.0, pd.NA], dtype=f"{endian}U32") + expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32") tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 643d08c4b3bea..1566476c32989 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -1,5 +1,3 @@ -from sys import byteorder - import numpy as np import pytest @@ -285,8 +283,7 @@ def test_to_numpy_na_raises(dtype): def test_astype_str(): a = pd.array([1, 2, None], dtype="Int64") - endian = {"little": "<", "big": ">"}[byteorder] - expected = np.array(["1", "2", ""], dtype=f"{endian}U21") + expected = np.array(["1", "2", ""], dtype=f"{tm.ENDIAN}U21") tm.assert_numpy_array_equal(a.astype(str), expected) tm.assert_numpy_array_equal(a.astype("str"), expected) diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index d54e6f85f67b7..6332ffd181eba 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -1,5 +1,4 @@ from collections import abc -from sys import byteorder import numpy as np import pytest @@ -15,9 +14,6 @@ import pandas._testing as tm -endian = {"little": "<", "big": ">"}[byteorder] - - class TestDataFrameToRecords: def test_to_records_timeseries(self): index = date_range("1/1/2000", periods=10) @@ -156,9 +152,9 @@ def test_to_records_with_categorical(self): np.rec.array( [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], dtype=[ - ("index", f"{endian}i8"), - ("A", f"{endian}i8"), - ("B", f"{endian}f8"), + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), ("C", "O"), ], ), @@ -169,35 +165,35 @@ def test_to_records_with_categorical(self): np.rec.array( [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], dtype=[ - ("index", f"{endian}i8"), - ("A", f"{endian}i8"), - ("B", f"{endian}f8"), + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), ("C", "O"), ], ), ), # Column dtype applied across the board. Index unaffected. ( - {"column_dtypes": f"{endian}U4"}, + {"column_dtypes": f"{tm.ENDIAN}U4"}, np.rec.array( [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[ - ("index", f"{endian}i8"), - ("A", f"{endian}U4"), - ("B", f"{endian}U4"), - ("C", f"{endian}U4"), + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}U4"), + ("B", f"{tm.ENDIAN}U4"), + ("C", f"{tm.ENDIAN}U4"), ], ), ), # Index dtype applied across the board. Columns unaffected. ( - {"index_dtypes": f"{endian}U1"}, + {"index_dtypes": f"{tm.ENDIAN}U1"}, np.rec.array( [("0", 1, 0.2, "a"), ("1", 2, 1.5, "bc")], dtype=[ - ("index", f"{endian}U1"), - ("A", f"{endian}i8"), - ("B", f"{endian}f8"), + ("index", f"{tm.ENDIAN}U1"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), ("C", "O"), ], ), @@ -208,10 +204,10 @@ def test_to_records_with_categorical(self): np.rec.array( [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[ - ("index", f"{endian}i8"), - ("A", f"{endian}U"), - ("B", f"{endian}U"), - ("C", f"{endian}U"), + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}U"), + ("B", f"{tm.ENDIAN}U"), + ("C", f"{tm.ENDIAN}U"), ], ), ), @@ -221,23 +217,29 @@ def test_to_records_with_categorical(self): np.rec.array( [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[ - ("index", f"{endian}i8"), - ("A", f"{endian}U"), - ("B", f"{endian}U"), - ("C", f"{endian}U"), + ("index", f"{tm.ENDIAN}i8"), + ("A", f"{tm.ENDIAN}U"), + ("B", f"{tm.ENDIAN}U"), + ("C", f"{tm.ENDIAN}U"), ], ), ), # Pass in a dictionary (name-only). ( - {"column_dtypes": {"A": np.int8, "B": np.float32, "C": f"{endian}U2"}}, + { + "column_dtypes": { + "A": np.int8, + "B": np.float32, + "C": f"{tm.ENDIAN}U2", + } + }, np.rec.array( [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[ - ("index", f"{endian}i8"), + ("index", f"{tm.ENDIAN}i8"), ("A", "i1"), - ("B", f"{endian}f4"), - ("C", f"{endian}U2"), + ("B", f"{tm.ENDIAN}f4"), + ("C", f"{tm.ENDIAN}U2"), ], ), ), @@ -248,18 +250,22 @@ def test_to_records_with_categorical(self): [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], dtype=[ ("index", "i2"), - ("A", f"{endian}i8"), - ("B", f"{endian}f8"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), ("C", "O"), ], ), ), # Ignore index mappings if index is not True. ( - {"index": False, "index_dtypes": f"{endian}U2"}, + {"index": False, "index_dtypes": f"{tm.ENDIAN}U2"}, np.rec.array( [(1, 0.2, "a"), (2, 1.5, "bc")], - dtype=[("A", f"{endian}i8"), ("B", f"{endian}f8"), ("C", "O")], + dtype=[ + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), + ("C", "O"), + ], ), ), # Non-existent names / indices in mapping should not error. @@ -269,8 +275,8 @@ def test_to_records_with_categorical(self): [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], dtype=[ ("index", "i2"), - ("A", f"{endian}i8"), - ("B", f"{endian}f8"), + ("A", f"{tm.ENDIAN}i8"), + ("B", f"{tm.ENDIAN}f8"), ("C", "O"), ], ), @@ -281,9 +287,9 @@ def test_to_records_with_categorical(self): np.rec.array( [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[ - ("index", f"{endian}i8"), + ("index", f"{tm.ENDIAN}i8"), ("A", "i1"), - ("B", f"{endian}f4"), + ("B", f"{tm.ENDIAN}f4"), ("C", "O"), ], ), @@ -294,9 +300,9 @@ def test_to_records_with_categorical(self): np.rec.array( [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[ - ("index", f"{endian}i8"), + ("index", f"{tm.ENDIAN}i8"), ("A", "i1"), - ("B", f"{endian}f4"), + ("B", f"{tm.ENDIAN}f4"), ("C", "O"), ], ), @@ -305,14 +311,14 @@ def test_to_records_with_categorical(self): ( { "column_dtypes": {"A": np.int8, "B": np.float32}, - "index_dtypes": f"{endian}U2", + "index_dtypes": f"{tm.ENDIAN}U2", }, np.rec.array( [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[ - ("index", f"{endian}U2"), + ("index", f"{tm.ENDIAN}U2"), ("A", "i1"), - ("B", f"{endian}f4"), + ("B", f"{tm.ENDIAN}f4"), ("C", "O"), ], ), @@ -363,7 +369,11 @@ def test_to_records_dtype(self, kwargs, expected): {"column_dtypes": "float64", "index_dtypes": {0: "int32", 1: "int8"}}, np.rec.array( [(1, 2, 3.0), (4, 5, 6.0), (7, 8, 9.0)], - dtype=[("a", f"{endian}i4"), ("b", "i1"), ("c", f"{endian}f8")], + dtype=[ + ("a", f"{tm.ENDIAN}i4"), + ("b", "i1"), + ("c", f"{tm.ENDIAN}f8"), + ], ), ), # MultiIndex in the columns. @@ -375,16 +385,16 @@ def test_to_records_dtype(self, kwargs, expected): ), ), { - "column_dtypes": {0: f"{endian}U1", 2: "float32"}, + "column_dtypes": {0: f"{tm.ENDIAN}U1", 2: "float32"}, "index_dtypes": "float32", }, np.rec.array( [(0.0, "1", 2, 3.0), (1.0, "4", 5, 6.0), (2.0, "7", 8, 9.0)], dtype=[ - ("index", f"{endian}f4"), - ("('a', 'd')", f"{endian}U1"), - ("('b', 'e')", f"{endian}i8"), - ("('c', 'f')", f"{endian}f4"), + ("index", f"{tm.ENDIAN}f4"), + ("('a', 'd')", f"{tm.ENDIAN}U1"), + ("('b', 'e')", f"{tm.ENDIAN}i8"), + ("('c', 'f')", f"{tm.ENDIAN}f4"), ], ), ), @@ -401,7 +411,7 @@ def test_to_records_dtype(self, kwargs, expected): ), { "column_dtypes": "float64", - "index_dtypes": {0: f"{endian}U2", 1: "int8"}, + "index_dtypes": {0: f"{tm.ENDIAN}U2", 1: "int8"}, }, np.rec.array( [ @@ -410,11 +420,11 @@ def test_to_records_dtype(self, kwargs, expected): ("f", -6, 7, 8, 9.0), ], dtype=[ - ("c", f"{endian}U2"), + ("c", f"{tm.ENDIAN}U2"), ("d", "i1"), - ("('a', 'd')", f"{endian}f8"), - ("('b', 'e')", f"{endian}f8"), - ("('c', 'f')", f"{endian}f8"), + ("('a', 'd')", f"{tm.ENDIAN}f8"), + ("('b', 'e')", f"{tm.ENDIAN}f8"), + ("('c', 'f')", f"{tm.ENDIAN}f8"), ], ), ), @@ -444,16 +454,16 @@ def keys(self): dtype_mappings = { "column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}), - "index_dtypes": f"{endian}U2", + "index_dtypes": f"{tm.ENDIAN}U2", } result = df.to_records(**dtype_mappings) expected = np.rec.array( [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[ - ("index", f"{endian}U2"), + ("index", f"{tm.ENDIAN}U2"), ("A", "i1"), - ("B", f"{endian}f4"), + ("B", f"{tm.ENDIAN}f4"), ("C", "O"), ], ) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index e9420cd84ee76..9a81790ca3bb0 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -12,7 +12,6 @@ ) import mmap import os -from sys import byteorder import tarfile import numpy as np @@ -32,9 +31,6 @@ import pandas._testing as tm -endian = {"little": "<", "big": ">"}[byteorder] - - @pytest.mark.parametrize( "malformed", ["1\r1\r1\r 1\r 1\r", "1\r1\r1\r 1\r 1\r11\r", "1\r1\r1\r 1\r 1\r11\r1\r"], @@ -148,9 +144,12 @@ def test_dtype_and_names_error(c_parser_only): "the dtype timedelta64 is not supported for parsing", {"dtype": {"A": "timedelta64", "B": "float64"}}, ), - (f"the dtype {endian}U8 is not supported for parsing", {"dtype": {"A": "U8"}}), + ( + f"the dtype {tm.ENDIAN}U8 is not supported for parsing", + {"dtype": {"A": "U8"}}, + ), ], - ids=["dt64-0", "dt64-1", "td64", f"{endian}U8"], + ids=["dt64-0", "dt64-1", "td64", f"{tm.ENDIAN}U8"], ) def test_unsupported_dtype(c_parser_only, match, kwargs): parser = c_parser_only diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index cc888fff2603f..6c11ec42858c0 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -2,7 +2,6 @@ time, timedelta, ) -from sys import byteorder import numpy as np import pytest @@ -198,9 +197,9 @@ def test_to_timedelta_on_missing_values(self): timedelta_NaT = np.timedelta64("NaT") actual = to_timedelta(Series(["00:00:01", np.nan])) - endian = {"little": "<", "big": ">"}[byteorder] expected = Series( - [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype=f"{endian}m8[ns]" + [np.timedelta64(1000000000, "ns"), timedelta_NaT], + dtype=f"{tm.ENDIAN}m8[ns]", ) tm.assert_series_equal(actual, expected)