Fix a few test failures on big-endian systems

musicinmybrain · musicinmybrain · commit c214e08bb6d5 · 2022-04-07T08:10:19.000-04:00
These are all due to tests expecting little-endian dtypes, where in fact
the endianness of the dtype is that of the host.
diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
 import numpy as np
 import pytest
 
@@ -20,7 +22,8 @@ def test_astype():
     tm.assert_numpy_array_equal(result, expected)
 
     result = arr.astype("str")
-    expected = np.array(["True", "False", "<NA>"], dtype="<U5")
+    endian = {"little": "<", "big": ">"}[byteorder]
+    expected = np.array(["True", "False", "<NA>"], dtype=f"{endian}U5")
     tm.assert_numpy_array_equal(result, expected)
 
     # no missing values
diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
 import numpy as np
 import pytest
 
@@ -273,7 +275,8 @@ def test_to_numpy(box):
 
     arr = con([True, False, None], dtype="boolean")
     result = arr.to_numpy(dtype="str")
-    expected = np.array([True, False, pd.NA], dtype="<U5")
+    endian = {"little": "<", "big": ">"}[byteorder]
+    expected = np.array([True, False, pd.NA], dtype=f"{endian}U5")
     tm.assert_numpy_array_equal(result, expected)
 
     # no missing values -> can convert to bool, otherwise raises
diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
 import numpy as np
 import pytest
 
@@ -115,7 +117,8 @@ def test_to_numpy_string(box, dtype):
     arr = con([0.0, 1.0, None], dtype="Float64")
 
     result = arr.to_numpy(dtype="str")
-    expected = np.array([0.0, 1.0, pd.NA], dtype="<U32")
+    endian = {"little": "<", "big": ">"}[byteorder]
+    expected = np.array([0.0, 1.0, pd.NA], dtype=f"{endian}U32")
     tm.assert_numpy_array_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
@@ -1,3 +1,5 @@
+from sys import byteorder
+
 import numpy as np
 import pytest
 
@@ -283,7 +285,8 @@ def test_to_numpy_na_raises(dtype):
 
 def test_astype_str():
     a = pd.array([1, 2, None], dtype="Int64")
-    expected = np.array(["1", "2", "<NA>"], dtype="<U21")
+    endian = {"little": "<", "big": ">"}[byteorder]
+    expected = np.array(["1", "2", "<NA>"], dtype=f"{endian}U21")
 
     tm.assert_numpy_array_equal(a.astype(str), expected)
     tm.assert_numpy_array_equal(a.astype("str"), expected)
diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py
@@ -1,4 +1,5 @@
 from collections import abc
+from sys import byteorder
 
 import numpy as np
 import pytest
@@ -14,6 +15,9 @@
 import pandas._testing as tm
 
 
+endian = {"little": "<", "big": ">"}[byteorder]
+
+
 class TestDataFrameToRecords:
     def test_to_records_timeseries(self):
         index = date_range("1/1/2000", periods=10)
@@ -151,106 +155,166 @@ def test_to_records_with_categorical(self):
                 {},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Should have no effect in this case.
             (
                 {"index": True},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Column dtype applied across the board. Index unaffected.
             (
-                {"column_dtypes": "<U4"},
+                {"column_dtypes": f"{endian}U4"},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "<U4"), ("B", "<U4"), ("C", "<U4")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}U4"),
+                        ("B", f"{endian}U4"),
+                        ("C", f"{endian}U4"),
+                    ],
                 ),
             ),
             # Index dtype applied across the board. Columns unaffected.
             (
-                {"index_dtypes": "<U1"},
+                {"index_dtypes": f"{endian}U1"},
                 np.rec.array(
                     [("0", 1, 0.2, "a"), ("1", 2, 1.5, "bc")],
-                    dtype=[("index", "<U1"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}U1"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Pass in a type instance.
             (
                 {"column_dtypes": str},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}U"),
+                        ("B", f"{endian}U"),
+                        ("C", f"{endian}U"),
+                    ],
                 ),
             ),
             # Pass in a dtype instance.
             (
                 {"column_dtypes": np.dtype("unicode")},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", f"{endian}U"),
+                        ("B", f"{endian}U"),
+                        ("C", f"{endian}U"),
+                    ],
                 ),
             ),
             # Pass in a dictionary (name-only).
             (
-                {"column_dtypes": {"A": np.int8, "B": np.float32, "C": "<U2"}},
+                {"column_dtypes": {"A": np.int8, "B": np.float32, "C": f"{endian}U2"}},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "<U2")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", "i1"),
+                        ("B", f"{endian}f4"),
+                        ("C", f"{endian}U2"),
+                    ],
                 ),
             ),
             # Pass in a dictionary (indices-only).
             (
                 {"index_dtypes": {0: "int16"}},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", "i2"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Ignore index mappings if index is not True.
             (
-                {"index": False, "index_dtypes": "<U2"},
+                {"index": False, "index_dtypes": f"{endian}U2"},
                 np.rec.array(
                     [(1, 0.2, "a"), (2, 1.5, "bc")],
-                    dtype=[("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[("A", f"{endian}i8"), ("B", f"{endian}f8"), ("C", "O")],
                 ),
             ),
             # Non-existent names / indices in mapping should not error.
             (
                 {"index_dtypes": {0: "int16", "not-there": "float32"}},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", "i2"),
+                        ("A", f"{endian}i8"),
+                        ("B", f"{endian}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Names / indices not in mapping default to array dtype.
             (
                 {"column_dtypes": {"A": np.int8, "B": np.float32}},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", "i1"),
+                        ("B", f"{endian}f4"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Names / indices not in dtype mapping default to array dtype.
             (
                 {"column_dtypes": {"A": np.dtype("int8"), "B": np.dtype("float32")}},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}i8"),
+                        ("A", "i1"),
+                        ("B", f"{endian}f4"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Mixture of everything.
             (
                 {
                     "column_dtypes": {"A": np.int8, "B": np.float32},
-                    "index_dtypes": "<U2",
+                    "index_dtypes": f"{endian}U2",
                 },
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{endian}U2"),
+                        ("A", "i1"),
+                        ("B", f"{endian}f4"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Invalid dype values.
@@ -299,7 +363,7 @@ def test_to_records_dtype(self, kwargs, expected):
                 {"column_dtypes": "float64", "index_dtypes": {0: "int32", 1: "int8"}},
                 np.rec.array(
                     [(1, 2, 3.0), (4, 5, 6.0), (7, 8, 9.0)],
-                    dtype=[("a", "<i4"), ("b", "i1"), ("c", "<f8")],
+                    dtype=[("a", f"{endian}i4"), ("b", "i1"), ("c", f"{endian}f8")],
                 ),
             ),
             # MultiIndex in the columns.
@@ -310,14 +374,17 @@ def test_to_records_dtype(self, kwargs, expected):
                         [("a", "d"), ("b", "e"), ("c", "f")]
                     ),
                 ),
-                {"column_dtypes": {0: "<U1", 2: "float32"}, "index_dtypes": "float32"},
+                {
+                    "column_dtypes": {0: f"{endian}U1", 2: "float32"},
+                    "index_dtypes": "float32",
+                },
                 np.rec.array(
                     [(0.0, "1", 2, 3.0), (1.0, "4", 5, 6.0), (2.0, "7", 8, 9.0)],
                     dtype=[
-                        ("index", "<f4"),
-                        ("('a', 'd')", "<U1"),
-                        ("('b', 'e')", "<i8"),
-                        ("('c', 'f')", "<f4"),
+                        ("index", f"{endian}f4"),
+                        ("('a', 'd')", f"{endian}U1"),
+                        ("('b', 'e')", f"{endian}i8"),
+                        ("('c', 'f')", f"{endian}f4"),
                     ],
                 ),
             ),
@@ -332,19 +399,22 @@ def test_to_records_dtype(self, kwargs, expected):
                         [("d", -4), ("d", -5), ("f", -6)], names=list("cd")
                     ),
                 ),
-                {"column_dtypes": "float64", "index_dtypes": {0: "<U2", 1: "int8"}},
+                {
+                    "column_dtypes": "float64",
+                    "index_dtypes": {0: f"{endian}U2", 1: "int8"},
+                },
                 np.rec.array(
                     [
                         ("d", -4, 1.0, 2.0, 3.0),
                         ("d", -5, 4.0, 5.0, 6.0),
                         ("f", -6, 7, 8, 9.0),
                     ],
                     dtype=[
-                        ("c", "<U2"),
+                        ("c", f"{endian}U2"),
                         ("d", "i1"),
-                        ("('a', 'd')", "<f8"),
-                        ("('b', 'e')", "<f8"),
-                        ("('c', 'f')", "<f8"),
+                        ("('a', 'd')", f"{endian}f8"),
+                        ("('b', 'e')", f"{endian}f8"),
+                        ("('c', 'f')", f"{endian}f8"),
                     ],
                 ),
             ),
@@ -374,13 +444,18 @@ def keys(self):
 
         dtype_mappings = {
             "column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}),
-            "index_dtypes": "<U2",
+            "index_dtypes": f"{endian}U2",
         }
 
         result = df.to_records(**dtype_mappings)
         expected = np.rec.array(
             [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-            dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+            dtype=[
+                ("index", f"{endian}U2"),
+                ("A", "i1"),
+                ("B", f"{endian}f4"),
+                ("C", "O"),
+            ],
         )
         tm.assert_almost_equal(result, expected)
 
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
@@ -12,6 +12,7 @@
 )
 import mmap
 import os
+from sys import byteorder
 import tarfile
 
 import numpy as np
@@ -31,6 +32,9 @@
 import pandas._testing as tm
 
 
+endian = {"little": "<", "big": ">"}[byteorder]
+
+
 @pytest.mark.parametrize(
     "malformed",
     ["1\r1\r1\r 1\r 1\r", "1\r1\r1\r 1\r 1\r11\r", "1\r1\r1\r 1\r 1\r11\r1\r"],
@@ -144,9 +148,9 @@ def test_dtype_and_names_error(c_parser_only):
             "the dtype timedelta64 is not supported for parsing",
             {"dtype": {"A": "timedelta64", "B": "float64"}},
         ),
-        ("the dtype <U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
+        (f"the dtype {endian}U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
     ],
-    ids=["dt64-0", "dt64-1", "td64", "<U8"],
+    ids=["dt64-0", "dt64-1", "td64", f"{endian}U8"],
 )
 def test_unsupported_dtype(c_parser_only, match, kwargs):
     parser = c_parser_only
diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py
@@ -6,6 +6,7 @@
     timedelta,
 )
 import operator
+from sys import byteorder
 
 import numpy as np
 import pytest
diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py

Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,7 @@`
`6`	`6`	`timedelta,`
`7`	`7`	`)`
`8`	`8`	`import operator`
	`9`	`+from sys import byteorder`
`9`	`10`
`10`	`11`	`import numpy as np`
`11`	`12`	`import pytest`