From 5b9cd4b81f284c5fa1248b7178cac754cae23cf8 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Fri, 17 Jun 2022 12:07:51 +0200
Subject: [PATCH 01/17] Fast byteswap

---
 pandas/io/sas/sas.pyx     | 82 +++++++++++++++++++++++++++++++++++++--
 pandas/io/sas/sas7bdat.py | 46 ++++++++++++++++------
 2 files changed, 114 insertions(+), 14 deletions(-)

diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index 9fcef64e07133..1bcc0f037b309 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -1,13 +1,19 @@
 # cython: profile=False
 # cython: boundscheck=False, initializedcheck=False
 from cython cimport Py_ssize_t
+from libc.stdint cimport (
+    int64_t,
+    uint8_t,
+    uint16_t,
+    uint32_t,
+    uint64_t,
+)
+from libc.string cimport memcpy
+
 import numpy as np
 
 import pandas.io.sas.sas_constants as const
 
-ctypedef signed long long   int64_t
-ctypedef unsigned char      uint8_t
-ctypedef unsigned short     uint16_t
 
 # rle_decompress decompresses data using a Run Length Encoding
 # algorithm.  It is partially documented here:
@@ -433,3 +439,73 @@ cdef class Parser:
         self.current_row_on_page_index += 1
         self.current_row_in_chunk_index += 1
         self.current_row_in_file_index += 1
+
+
+def read_float_with_byteswap(const uint8_t *data, bint byteswap):
+    cdef float res = (<float*>data)[0]
+    if byteswap:
+        res = _byteswap_float(res)
+    return res
+
+
+def read_double_with_byteswap(const uint8_t *data, bint byteswap):
+    cdef double res = (<double*>data)[0]
+    if byteswap:
+        res = _byteswap_double(res)
+    return res
+
+
+def read_uint16_with_byteswap(const uint8_t *data, bint byteswap):
+    cdef uint16_t res = (<uint16_t *>data)[0]
+    if byteswap:
+        res = _byteswap2(res)
+    return res
+
+
+def read_uint32_with_byteswap(const uint8_t *data, bint byteswap):
+    cdef uint32_t res = (<uint32_t *>data)[0]
+    if byteswap:
+        res = _byteswap4(res)
+    return res
+
+
+def read_uint64_with_byteswap(const uint8_t *data, bint byteswap):
+    cdef uint64_t res = (<uint64_t *>data)[0]
+    if byteswap:
+        res = _byteswap8(res)
+    return res
+
+
+# Byteswapping
+# From https://github.com/WizardMac/ReadStat/blob/master/src/readstat_bits.
+# Copyright (c) 2013-2016 Evan Miller, Apache 2 License
+
+cdef inline uint16_t _byteswap2(uint16_t num):
+    return ((num & 0xFF00) >> 8) | ((num & 0x00FF) << 8)
+
+
+cdef inline uint32_t _byteswap4(uint32_t num):
+    num = ((num & <uint32_t>0xFFFF0000) >> 16) | ((num & <uint32_t>0x0000FFFF) << 16)
+    return ((num & <uint32_t>0xFF00FF00) >> 8) | ((num & <uint32_t>0x00FF00FF) << 8)
+
+
+cdef inline uint64_t _byteswap8(uint64_t num):
+    num = ((num & <uint64_t>0xFFFFFFFF00000000) >> 32) | ((num & <uint64_t>0x00000000FFFFFFFF) << 32)
+    num = ((num & <uint64_t>0xFFFF0000FFFF0000) >> 16) | ((num & <uint64_t>0x0000FFFF0000FFFF) << 16)
+    return ((num & <uint64_t>0xFF00FF00FF00FF00) >> 8) | ((num & <uint64_t>0x00FF00FF00FF00FF) << 8)
+
+
+cdef inline float _byteswap_float(float num):
+    cdef uint32_t answer = 0
+    memcpy(&answer, &num, 4)
+    answer = _byteswap4(answer)
+    memcpy(&num, &answer, 4)
+    return num
+
+
+cdef inline double _byteswap_double(double num):
+    cdef uint64_t answer = 0
+    memcpy(&answer, &num, 8)
+    answer = _byteswap8(answer)
+    memcpy(&num, &answer, 8)
+    return num
diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 7282affe1b5e6..31ab35b475374 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -20,7 +20,7 @@
     datetime,
     timedelta,
 )
-import struct
+import sys
 from typing import cast
 
 import numpy as np
@@ -42,7 +42,14 @@
 )
 
 from pandas.io.common import get_handle
-from pandas.io.sas._sas import Parser
+from pandas.io.sas._sas import (
+    Parser,
+    read_double_with_byteswap,
+    read_float_with_byteswap,
+    read_uint16_with_byteswap,
+    read_uint32_with_byteswap,
+    read_uint64_with_byteswap,
+)
 import pandas.io.sas.sas_constants as const
 from pandas.io.sas.sasreader import ReaderBase
 
@@ -259,8 +266,10 @@ def _get_properties(self) -> None:
         buf = self._read_bytes(const.endianness_offset, const.endianness_length)
         if buf == b"\x01":
             self.byte_order = "<"
+            self.need_byteswap = sys.byteorder == "big"
         else:
             self.byte_order = ">"
+            self.need_byteswap = sys.byteorder == "little"
 
         # Get encoding information
         buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0]
@@ -345,22 +354,37 @@ def __next__(self) -> DataFrame:
 
     # Read a single float of the given width (4 or 8).
     def _read_float(self, offset: int, width: int):
-        if width not in (4, 8):
+        if width == 4:
+            return read_float_with_byteswap(
+                self._read_bytes(offset, 4), self.need_byteswap
+            )
+        elif width == 8:
+            return read_double_with_byteswap(
+                self._read_bytes(offset, 8), self.need_byteswap
+            )
+        else:
             self.close()
             raise ValueError("invalid float width")
-        buf = self._read_bytes(offset, width)
-        fd = "f" if width == 4 else "d"
-        return struct.unpack(self.byte_order + fd, buf)[0]
 
     # Read a single signed integer of the given width (1, 2, 4 or 8).
     def _read_int(self, offset: int, width: int) -> int:
-        if width not in (1, 2, 4, 8):
+        if width == 1:
+            return self._read_bytes(offset, 1)[0]
+        elif width == 2:
+            return read_uint16_with_byteswap(
+                self._read_bytes(offset, 2), self.need_byteswap
+            )
+        elif width == 4:
+            return read_uint32_with_byteswap(
+                self._read_bytes(offset, 4), self.need_byteswap
+            )
+        elif width == 8:
+            return read_uint64_with_byteswap(
+                self._read_bytes(offset, 8), self.need_byteswap
+            )
+        else:
             self.close()
             raise ValueError("invalid int width")
-        buf = self._read_bytes(offset, width)
-        it = {1: "b", 2: "h", 4: "l", 8: "q"}[width]
-        iv = struct.unpack(self.byte_order + it, buf)[0]
-        return iv
 
     def _read_bytes(self, offset: int, length: int):
         if self._cached_page is None:

From 17c965f964114682d3b11ff7babc8d7c0db6b381 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Fri, 17 Jun 2022 15:36:33 +0200
Subject: [PATCH 02/17] Add types

---
 pandas/io/sas/_sas.pyi | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/io/sas/_sas.pyi b/pandas/io/sas/_sas.pyi
index 527193dd71e57..e8b2dce7d3266 100644
--- a/pandas/io/sas/_sas.pyi
+++ b/pandas/io/sas/_sas.pyi
@@ -3,3 +3,9 @@ from pandas.io.sas.sas7bdat import SAS7BDATReader
 class Parser:
     def __init__(self, parser: SAS7BDATReader) -> None: ...
     def read(self, nrows: int) -> None: ...
+
+def read_float_with_byteswap(data: bytes, byteswap: bool) -> float: ...
+def read_double_with_byteswap(data: bytes, byteswap: bool) -> float: ...
+def read_uint16_with_byteswap(data: bytes, byteswap: bool) -> int: ...
+def read_uint32_with_byteswap(data: bytes, byteswap: bool) -> int: ...
+def read_uint64_with_byteswap(data: bytes, byteswap: bool) -> int: ...

From 435a003c916bf5f5c02d8182d30e903d47f4f718 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Sat, 9 Jul 2022 09:45:17 +0200
Subject: [PATCH 03/17] Review feedback

---
 pandas/io/sas/sas.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index b9e84672e611a..d75a1688075d0 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -439,6 +439,8 @@ cdef class Parser:
         self.current_row_in_file_index += 1
 
 
+# The following are faster versions of struct.unpack that avoid the overhead of
+# Python function calls.  They may be called up to (n_rows * n_cols) times.
 def read_float_with_byteswap(const uint8_t *data, bint byteswap):
     cdef float res = (<float*>data)[0]
     if byteswap:

From 10ab87fb7624c29e8b9aa7b475f983a48ad8af0b Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Sat, 9 Jul 2022 21:44:09 +0200
Subject: [PATCH 04/17] Slightly faster variant (1 less bytes obj construction)

---
 pandas/io/sas/_sas.pyi    | 10 +++++-----
 pandas/io/sas/sas.pyx     | 35 +++++++++++++++++++++++++----------
 pandas/io/sas/sas7bdat.py | 27 +++++++++------------------
 3 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/pandas/io/sas/_sas.pyi b/pandas/io/sas/_sas.pyi
index e8b2dce7d3266..8353a9ee0a890 100644
--- a/pandas/io/sas/_sas.pyi
+++ b/pandas/io/sas/_sas.pyi
@@ -4,8 +4,8 @@ class Parser:
     def __init__(self, parser: SAS7BDATReader) -> None: ...
     def read(self, nrows: int) -> None: ...
 
-def read_float_with_byteswap(data: bytes, byteswap: bool) -> float: ...
-def read_double_with_byteswap(data: bytes, byteswap: bool) -> float: ...
-def read_uint16_with_byteswap(data: bytes, byteswap: bool) -> int: ...
-def read_uint32_with_byteswap(data: bytes, byteswap: bool) -> int: ...
-def read_uint64_with_byteswap(data: bytes, byteswap: bool) -> int: ...
+def read_float_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
+def read_double_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
+def read_uint16_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
+def read_uint32_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
+def read_uint64_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index d75a1688075d0..a4944cd3bfc3e 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -441,36 +441,51 @@ cdef class Parser:
 
 # The following are faster versions of struct.unpack that avoid the overhead of
 # Python function calls.  They may be called up to (n_rows * n_cols) times.
-def read_float_with_byteswap(const uint8_t *data, bint byteswap):
-    cdef float res = (<float*>data)[0]
+def read_float_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 4 < len(data)
+    cdef:
+        const char *data_ptr = data
+        float res = (<float*>(data_ptr + offset))[0]
     if byteswap:
         res = _byteswap_float(res)
     return res
 
 
-def read_double_with_byteswap(const uint8_t *data, bint byteswap):
-    cdef double res = (<double*>data)[0]
+def read_double_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 8 < len(data)
+    cdef:
+        const char *data_ptr = data
+        double res = (<double*>(data_ptr + offset))[0]
     if byteswap:
         res = _byteswap_double(res)
     return res
 
 
-def read_uint16_with_byteswap(const uint8_t *data, bint byteswap):
-    cdef uint16_t res = (<uint16_t *>data)[0]
+def read_uint16_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 2 < len(data)
+    cdef:
+        const char *data_ptr = data
+        uint16_t res = (<uint16_t *>(data_ptr + offset))[0]
     if byteswap:
         res = _byteswap2(res)
     return res
 
 
-def read_uint32_with_byteswap(const uint8_t *data, bint byteswap):
-    cdef uint32_t res = (<uint32_t *>data)[0]
+def read_uint32_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 4 < len(data)
+    cdef:
+        const char *data_ptr = data
+        uint32_t res = (<uint32_t *>(data_ptr + offset))[0]
     if byteswap:
         res = _byteswap4(res)
     return res
 
 
-def read_uint64_with_byteswap(const uint8_t *data, bint byteswap):
-    cdef uint64_t res = (<uint64_t *>data)[0]
+def read_uint64_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 8 < len(data)
+    cdef:
+        const char *data_ptr = data
+        uint64_t res = (<uint64_t *>(data_ptr + offset))[0]
     if byteswap:
         res = _byteswap8(res)
     return res
diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 31ab35b475374..72a05744d0f8e 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -356,11 +356,11 @@ def __next__(self) -> DataFrame:
     def _read_float(self, offset: int, width: int):
         if width == 4:
             return read_float_with_byteswap(
-                self._read_bytes(offset, 4), self.need_byteswap
+                self._cached_page, offset, self.need_byteswap
             )
         elif width == 8:
             return read_double_with_byteswap(
-                self._read_bytes(offset, 8), self.need_byteswap
+                self._cached_page, offset, self.need_byteswap
             )
         else:
             self.close()
@@ -372,34 +372,25 @@ def _read_int(self, offset: int, width: int) -> int:
             return self._read_bytes(offset, 1)[0]
         elif width == 2:
             return read_uint16_with_byteswap(
-                self._read_bytes(offset, 2), self.need_byteswap
+                self._cached_page, offset, self.need_byteswap
             )
         elif width == 4:
             return read_uint32_with_byteswap(
-                self._read_bytes(offset, 4), self.need_byteswap
+                self._cached_page, offset, self.need_byteswap
             )
         elif width == 8:
             return read_uint64_with_byteswap(
-                self._read_bytes(offset, 8), self.need_byteswap
+                self._cached_page, offset, self.need_byteswap
             )
         else:
             self.close()
             raise ValueError("invalid int width")
 
     def _read_bytes(self, offset: int, length: int):
-        if self._cached_page is None:
-            self._path_or_buf.seek(offset)
-            buf = self._path_or_buf.read(length)
-            if len(buf) < length:
-                self.close()
-                msg = f"Unable to read {length:d} bytes from file position {offset:d}."
-                raise ValueError(msg)
-            return buf
-        else:
-            if offset + length > len(self._cached_page):
-                self.close()
-                raise ValueError("The cached page is too small.")
-            return self._cached_page[offset : offset + length]
+        if offset + length > len(self._cached_page):
+            self.close()
+            raise ValueError("The cached page is too small.")
+        return self._cached_page[offset : offset + length]
 
     def _read_and_convert_header_text(self, offset: int, length: int) -> str | bytes:
         return self._convert_header_text(

From ad74f5c38526203559e8d8e63848f1b15b25ec97 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Sun, 10 Jul 2022 22:22:06 +0200
Subject: [PATCH 05/17] Make MyPy happy?

---
 pandas/io/sas/sas7bdat.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 72a05744d0f8e..832b36da34952 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -354,6 +354,7 @@ def __next__(self) -> DataFrame:
 
     # Read a single float of the given width (4 or 8).
     def _read_float(self, offset: int, width: int):
+        assert self._cached_page is not None
         if width == 4:
             return read_float_with_byteswap(
                 self._cached_page, offset, self.need_byteswap
@@ -368,6 +369,7 @@ def _read_float(self, offset: int, width: int):
 
     # Read a single signed integer of the given width (1, 2, 4 or 8).
     def _read_int(self, offset: int, width: int) -> int:
+        assert self._cached_page is not None
         if width == 1:
             return self._read_bytes(offset, 1)[0]
         elif width == 2:

From 9c5b4b3449a05f8b6f8464e5b96fe63d3a639a6f Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Mon, 11 Jul 2022 08:51:04 +0200
Subject: [PATCH 06/17] Update sas7bdat.py

---
 pandas/io/sas/sas7bdat.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 832b36da34952..f36e63a0e000b 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -389,6 +389,7 @@ def _read_int(self, offset: int, width: int) -> int:
             raise ValueError("invalid int width")
 
     def _read_bytes(self, offset: int, length: int):
+        assert self._cached_page is not None
         if offset + length > len(self._cached_page):
             self.close()
             raise ValueError("The cached page is too small.")

From f3c63f08e9b5d6e54e2c2e527c91042311908f3a Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Thu, 21 Jul 2022 09:10:03 +0200
Subject: [PATCH 07/17] Use intrinsics

---
 pandas/io/sas/sas.pyx | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index d9dc639e38050..3e0474903883d 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -495,35 +495,31 @@ def read_uint64_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
 
 
 # Byteswapping
-# From https://github.com/WizardMac/ReadStat/blob/master/src/readstat_bits.
-# Copyright (c) 2013-2016 Evan Miller, Apache 2 License
 
-cdef inline uint16_t _byteswap2(uint16_t num):
-    return ((num & 0xFF00) >> 8) | ((num & 0x00FF) << 8)
-
-
-cdef inline uint32_t _byteswap4(uint32_t num):
-    num = ((num & <uint32_t>0xFFFF0000) >> 16) | ((num & <uint32_t>0x0000FFFF) << 16)
-    return ((num & <uint32_t>0xFF00FF00) >> 8) | ((num & <uint32_t>0x00FF00FF) << 8)
-
-
-cdef inline uint64_t _byteswap8(uint64_t num):
-    num = ((num & <uint64_t>0xFFFFFFFF00000000) >> 32) | ((num & <uint64_t>0x00000000FFFFFFFF) << 32)
-    num = ((num & <uint64_t>0xFFFF0000FFFF0000) >> 16) | ((num & <uint64_t>0x0000FFFF0000FFFF) << 16)
-    return ((num & <uint64_t>0xFF00FF00FF00FF00) >> 8) | ((num & <uint64_t>0x00FF00FF00FF00FF) << 8)
+cdef extern from *:
+    """
+    #ifdef _MSC_VER
+        #define _byteswap2 _byteswap_ushort
+        #define _byteswap4 _byteswap_ulong
+        #define _byteswap8 _byteswap_uint64
+    #else
+        #define _byteswap2 __builtin_bswap16
+        #define _byteswap4 __builtin_bswap32
+        #define _byteswap8 __builtin_bswap64
+    #endif
+    """
+    uint16_t _byteswap2(uint16_t)
+    uint32_t _byteswap4(uint32_t)
+    uint64_t _byteswap8(uint64_t)
 
 
 cdef inline float _byteswap_float(float num):
-    cdef uint32_t answer = 0
-    memcpy(&answer, &num, 4)
-    answer = _byteswap4(answer)
-    memcpy(&num, &answer, 4)
+    cdef uint32_t *intptr = <uint32_t *>&num
+    intptr[0] = _byteswap4(intptr[0])
     return num
 
 
 cdef inline double _byteswap_double(double num):
-    cdef uint64_t answer = 0
-    memcpy(&answer, &num, 8)
-    answer = _byteswap8(answer)
-    memcpy(&num, &answer, 8)
+    cdef uint64_t *intptr = <uint64_t *>&num
+    intptr[0] = _byteswap8(intptr[0])
     return num

From c310c0d8889fbf8e9e43e32abb91e289f8c2d779 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Sat, 10 Sep 2022 17:51:17 +0200
Subject: [PATCH 08/17] Lint

---
 pandas/io/sas/sas.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index 3e0474903883d..353b24d983bb7 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -8,7 +8,6 @@ from libc.stdint cimport (
     uint32_t,
     uint64_t,
 )
-from libc.string cimport memcpy
 
 import numpy as np
 

From 3b7ba836263ac3d069017cff3968967d6f8ff833 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Sun, 11 Sep 2022 00:34:20 +0200
Subject: [PATCH 09/17] Add tests + move byteswap to module

---
 pandas/io/sas/_byteswap.pyi          |  5 ++
 pandas/io/sas/_sas.pyi               |  6 --
 pandas/io/sas/byteswap.pyx           | 92 ++++++++++++++++++++++++++++
 pandas/io/sas/sas.pyx                | 85 -------------------------
 pandas/io/sas/sas7bdat.py            |  4 +-
 pandas/tests/io/sas/test_byteswap.py | 48 +++++++++++++++
 setup.py                             |  2 +
 7 files changed, 149 insertions(+), 93 deletions(-)
 create mode 100644 pandas/io/sas/_byteswap.pyi
 create mode 100644 pandas/io/sas/byteswap.pyx
 create mode 100644 pandas/tests/io/sas/test_byteswap.py

diff --git a/pandas/io/sas/_byteswap.pyi b/pandas/io/sas/_byteswap.pyi
new file mode 100644
index 0000000000000..bb0dbfc6a50b1
--- /dev/null
+++ b/pandas/io/sas/_byteswap.pyi
@@ -0,0 +1,5 @@
+def read_float_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
+def read_double_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
+def read_uint16_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
+def read_uint32_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
+def read_uint64_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
diff --git a/pandas/io/sas/_sas.pyi b/pandas/io/sas/_sas.pyi
index 8353a9ee0a890..527193dd71e57 100644
--- a/pandas/io/sas/_sas.pyi
+++ b/pandas/io/sas/_sas.pyi
@@ -3,9 +3,3 @@ from pandas.io.sas.sas7bdat import SAS7BDATReader
 class Parser:
     def __init__(self, parser: SAS7BDATReader) -> None: ...
     def read(self, nrows: int) -> None: ...
-
-def read_float_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
-def read_double_with_byteswap(data: bytes, offset: int, byteswap: bool) -> float: ...
-def read_uint16_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
-def read_uint32_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
-def read_uint64_with_byteswap(data: bytes, offset: int, byteswap: bool) -> int: ...
diff --git a/pandas/io/sas/byteswap.pyx b/pandas/io/sas/byteswap.pyx
new file mode 100644
index 0000000000000..4620403910274
--- /dev/null
+++ b/pandas/io/sas/byteswap.pyx
@@ -0,0 +1,92 @@
+"""
+The following are faster versions of struct.unpack that avoid the overhead of Python function calls.
+
+In the SAS7BDAT parser, they may be called up to (n_rows * n_cols) times.
+"""
+from cython cimport Py_ssize_t
+from libc.stdint cimport (
+    uint16_t,
+    uint32_t,
+    uint64_t,
+)
+
+
+def read_float_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 4 < len(data)
+    cdef:
+        const char *data_ptr = data
+        float res = (<float*>(data_ptr + offset))[0]
+    if byteswap:
+        res = _byteswap_float(res)
+    return res
+
+
+def read_double_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 8 < len(data)
+    cdef:
+        const char *data_ptr = data
+        double res = (<double*>(data_ptr + offset))[0]
+    if byteswap:
+        res = _byteswap_double(res)
+    return res
+
+
+def read_uint16_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 2 < len(data)
+    cdef:
+        const char *data_ptr = data
+        uint16_t res = (<uint16_t *>(data_ptr + offset))[0]
+    if byteswap:
+        res = _byteswap2(res)
+    return res
+
+
+def read_uint32_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 4 < len(data)
+    cdef:
+        const char *data_ptr = data
+        uint32_t res = (<uint32_t *>(data_ptr + offset))[0]
+    if byteswap:
+        res = _byteswap4(res)
+    return res
+
+
+def read_uint64_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
+    assert offset + 8 < len(data)
+    cdef:
+        const char *data_ptr = data
+        uint64_t res = (<uint64_t *>(data_ptr + offset))[0]
+    if byteswap:
+        res = _byteswap8(res)
+    return res
+
+
+# Byteswapping
+
+cdef extern from *:
+    """
+    #ifdef _MSC_VER
+        #define _byteswap2 _byteswap_ushort
+        #define _byteswap4 _byteswap_ulong
+        #define _byteswap8 _byteswap_uint64
+    #else
+        #define _byteswap2 __builtin_bswap16
+        #define _byteswap4 __builtin_bswap32
+        #define _byteswap8 __builtin_bswap64
+    #endif
+    """
+    uint16_t _byteswap2(uint16_t)
+    uint32_t _byteswap4(uint32_t)
+    uint64_t _byteswap8(uint64_t)
+
+
+cdef inline float _byteswap_float(float num):
+    cdef uint32_t *intptr = <uint32_t *>&num
+    intptr[0] = _byteswap4(intptr[0])
+    return num
+
+
+cdef inline double _byteswap_double(double num):
+    cdef uint64_t *intptr = <uint64_t *>&num
+    intptr[0] = _byteswap8(intptr[0])
+    return num
diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index 353b24d983bb7..0194ae4d24ecf 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -5,8 +5,6 @@ from libc.stdint cimport (
     int64_t,
     uint8_t,
     uint16_t,
-    uint32_t,
-    uint64_t,
 )
 
 import numpy as np
@@ -439,86 +437,3 @@ cdef class Parser:
         self.current_row_on_page_index += 1
         self.current_row_in_chunk_index += 1
         self.current_row_in_file_index += 1
-
-
-# The following are faster versions of struct.unpack that avoid the overhead of
-# Python function calls.  They may be called up to (n_rows * n_cols) times.
-def read_float_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
-    assert offset + 4 < len(data)
-    cdef:
-        const char *data_ptr = data
-        float res = (<float*>(data_ptr + offset))[0]
-    if byteswap:
-        res = _byteswap_float(res)
-    return res
-
-
-def read_double_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
-    assert offset + 8 < len(data)
-    cdef:
-        const char *data_ptr = data
-        double res = (<double*>(data_ptr + offset))[0]
-    if byteswap:
-        res = _byteswap_double(res)
-    return res
-
-
-def read_uint16_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
-    assert offset + 2 < len(data)
-    cdef:
-        const char *data_ptr = data
-        uint16_t res = (<uint16_t *>(data_ptr + offset))[0]
-    if byteswap:
-        res = _byteswap2(res)
-    return res
-
-
-def read_uint32_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
-    assert offset + 4 < len(data)
-    cdef:
-        const char *data_ptr = data
-        uint32_t res = (<uint32_t *>(data_ptr + offset))[0]
-    if byteswap:
-        res = _byteswap4(res)
-    return res
-
-
-def read_uint64_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
-    assert offset + 8 < len(data)
-    cdef:
-        const char *data_ptr = data
-        uint64_t res = (<uint64_t *>(data_ptr + offset))[0]
-    if byteswap:
-        res = _byteswap8(res)
-    return res
-
-
-# Byteswapping
-
-cdef extern from *:
-    """
-    #ifdef _MSC_VER
-        #define _byteswap2 _byteswap_ushort
-        #define _byteswap4 _byteswap_ulong
-        #define _byteswap8 _byteswap_uint64
-    #else
-        #define _byteswap2 __builtin_bswap16
-        #define _byteswap4 __builtin_bswap32
-        #define _byteswap8 __builtin_bswap64
-    #endif
-    """
-    uint16_t _byteswap2(uint16_t)
-    uint32_t _byteswap4(uint32_t)
-    uint64_t _byteswap8(uint64_t)
-
-
-cdef inline float _byteswap_float(float num):
-    cdef uint32_t *intptr = <uint32_t *>&num
-    intptr[0] = _byteswap4(intptr[0])
-    return num
-
-
-cdef inline double _byteswap_double(double num):
-    cdef uint64_t *intptr = <uint64_t *>&num
-    intptr[0] = _byteswap8(intptr[0])
-    return num
diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index a1069b47d8812..8b03201883d06 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -42,14 +42,14 @@
 )
 
 from pandas.io.common import get_handle
-from pandas.io.sas._sas import (
-    Parser,
+from pandas.io.sas._byteswap import (
     read_double_with_byteswap,
     read_float_with_byteswap,
     read_uint16_with_byteswap,
     read_uint32_with_byteswap,
     read_uint64_with_byteswap,
 )
+from pandas.io.sas._sas import Parser
 import pandas.io.sas.sas_constants as const
 from pandas.io.sas.sasreader import ReaderBase
 
diff --git a/pandas/tests/io/sas/test_byteswap.py b/pandas/tests/io/sas/test_byteswap.py
new file mode 100644
index 0000000000000..84a6bf1cb6b1e
--- /dev/null
+++ b/pandas/tests/io/sas/test_byteswap.py
@@ -0,0 +1,48 @@
+import struct
+import sys
+
+from hypothesis import (
+    assume,
+    example,
+    given,
+    strategies as st,
+)
+import numpy as np
+import pytest
+
+from pandas.io.sas._byteswap import (
+    read_double_with_byteswap,
+    read_float_with_byteswap,
+    read_uint16_with_byteswap,
+    read_uint32_with_byteswap,
+    read_uint64_with_byteswap,
+)
+
+_swapped_byte_order = {"big": "<", "little": ">"}[sys.byteorder]
+
+
+@given(read_offset=st.integers(0, 11), number=st.integers(min_value=0))
+@example(number=2**16, read_offset=0)
+@example(number=2**32, read_offset=0)
+@example(number=2**64, read_offset=0)
+@pytest.mark.parametrize("int_type", ["H", "I", "Q"])
+@pytest.mark.parametrize("should_byteswap", [True, False])
+def test_int_byteswap(read_offset, number, int_type, should_byteswap):
+    int_type_nbytes = struct.calcsize(int_type)
+    assume(number < 2 ** (8 * int_type_nbytes))
+    number_bytes = struct.pack(int_type, number)
+    data = bytearray(np.random.default_rng().bytes(20))
+    data[read_offset : read_offset + int_type_nbytes] = number_bytes
+    read_uintxx_with_byteswap = {
+        "H": read_uint16_with_byteswap,
+        "I": read_uint32_with_byteswap,
+        "Q": read_uint64_with_byteswap,
+    }[int_type]
+    output_number = read_uintxx_with_byteswap(bytes(data), read_offset, should_byteswap)
+    if should_byteswap:
+        (number_bytes_swapped,) = struct.unpack(
+            _swapped_byte_order + int_type, number_bytes
+        )
+        assert output_number == number_bytes_swapped
+    else:
+        assert output_number == number
diff --git a/setup.py b/setup.py
index 12e8aa36c3794..3a7a2f3853ce5 100755
--- a/setup.py
+++ b/setup.py
@@ -226,6 +226,7 @@ class CheckSDist(sdist_class):
         "pandas/_libs/window/indexers.pyx",
         "pandas/_libs/writers.pyx",
         "pandas/io/sas/sas.pyx",
+        "pandas/io/sas/byteswap.pyx",
     ]
 
     _cpp_pyxfiles = [
@@ -570,6 +571,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
     "_libs.window.indexers": {"pyxfile": "_libs/window/indexers"},
     "_libs.writers": {"pyxfile": "_libs/writers"},
     "io.sas._sas": {"pyxfile": "io/sas/sas"},
+    "io.sas._byteswap": {"pyxfile": "io/sas/byteswap"},
 }
 
 extensions = []

From 53fbce2e5e2e39fdd3024bcd32c51306191e4af4 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Sun, 11 Sep 2022 01:02:23 +0200
Subject: [PATCH 10/17] Add float tests + refactoring

---
 pandas/tests/io/sas/test_byteswap.py | 48 +++++++++++++++-------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/pandas/tests/io/sas/test_byteswap.py b/pandas/tests/io/sas/test_byteswap.py
index 84a6bf1cb6b1e..c11f8406b7803 100644
--- a/pandas/tests/io/sas/test_byteswap.py
+++ b/pandas/tests/io/sas/test_byteswap.py
@@ -1,6 +1,3 @@
-import struct
-import sys
-
 from hypothesis import (
     assume,
     example,
@@ -18,31 +15,38 @@
     read_uint64_with_byteswap,
 )
 
-_swapped_byte_order = {"big": "<", "little": ">"}[sys.byteorder]
-
 
 @given(read_offset=st.integers(0, 11), number=st.integers(min_value=0))
 @example(number=2**16, read_offset=0)
 @example(number=2**32, read_offset=0)
 @example(number=2**64, read_offset=0)
-@pytest.mark.parametrize("int_type", ["H", "I", "Q"])
+@pytest.mark.parametrize("int_type", [np.uint16, np.uint32, np.uint64])
 @pytest.mark.parametrize("should_byteswap", [True, False])
 def test_int_byteswap(read_offset, number, int_type, should_byteswap):
-    int_type_nbytes = struct.calcsize(int_type)
-    assume(number < 2 ** (8 * int_type_nbytes))
-    number_bytes = struct.pack(int_type, number)
-    data = bytearray(np.random.default_rng().bytes(20))
-    data[read_offset : read_offset + int_type_nbytes] = number_bytes
-    read_uintxx_with_byteswap = {
-        "H": read_uint16_with_byteswap,
-        "I": read_uint32_with_byteswap,
-        "Q": read_uint64_with_byteswap,
-    }[int_type]
-    output_number = read_uintxx_with_byteswap(bytes(data), read_offset, should_byteswap)
+    assume(number < 2 ** (8 * int_type(0).itemsize))
+    _test(number, int_type, read_offset, should_byteswap)
+
+
+@given(read_offset=st.integers(0, 11), number=st.floats())
+@pytest.mark.parametrize("float_type", [np.float32, np.float64])
+@pytest.mark.parametrize("should_byteswap", [True, False])
+def test_float_byteswap(read_offset, number, float_type, should_byteswap):
+    _test(number, float_type, read_offset, should_byteswap)
+
+
+def _test(number, number_type, read_offset, should_byteswap):
+    number = number_type([number])
+    data = np.random.default_rng().integers(0, 256, size=20, dtype="uint8")
+    data[read_offset : read_offset + number.itemsize] = number.view("uint8")
+    swap_func = {
+        np.float32: read_float_with_byteswap,
+        np.float64: read_double_with_byteswap,
+        np.uint16: read_uint16_with_byteswap,
+        np.uint32: read_uint32_with_byteswap,
+        np.uint64: read_uint64_with_byteswap,
+    }[type(number[0])]
+    output_number = swap_func(bytes(data), read_offset, should_byteswap)
     if should_byteswap:
-        (number_bytes_swapped,) = struct.unpack(
-            _swapped_byte_order + int_type, number_bytes
-        )
-        assert output_number == number_bytes_swapped
+        np.testing.assert_equal(output_number, number.byteswap())
     else:
-        assert output_number == number
+        np.testing.assert_equal(output_number, number)

From 9cbc5beebe14c0a2f11ec827d0803ff557790df2 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Sun, 11 Sep 2022 01:05:03 +0200
Subject: [PATCH 11/17] Undo unrelated changes

---
 pandas/io/sas/sas.pyx | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index 0194ae4d24ecf..3ba0067331328 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -1,16 +1,14 @@
 # cython: profile=False
 # cython: boundscheck=False, initializedcheck=False
 from cython cimport Py_ssize_t
-from libc.stdint cimport (
-    int64_t,
-    uint8_t,
-    uint16_t,
-)
-
 import numpy as np
 
 import pandas.io.sas.sas_constants as const
 
+ctypedef signed long long   int64_t
+ctypedef unsigned char      uint8_t
+ctypedef unsigned short     uint16_t
+
 
 # rle_decompress decompresses data using a Run Length Encoding
 # algorithm.  It is partially documented here:

From 48028484fbbe617737e05015fe79a3d61ceb7764 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Sun, 11 Sep 2022 01:05:30 +0200
Subject: [PATCH 12/17] Undo unrelated changes

---
 pandas/io/sas/sas.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index 3ba0067331328..d8591c0b033a6 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -9,7 +9,6 @@ ctypedef signed long long   int64_t
 ctypedef unsigned char      uint8_t
 ctypedef unsigned short     uint16_t
 
-
 # rle_decompress decompresses data using a Run Length Encoding
 # algorithm.  It is partially documented here:
 #

From 41abe02d93707892d3d2c5c8056a02401dac246c Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Sun, 11 Sep 2022 11:00:01 +0200
Subject: [PATCH 13/17] Lint

---
 pandas/tests/io/sas/test_byteswap.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/io/sas/test_byteswap.py b/pandas/tests/io/sas/test_byteswap.py
index c11f8406b7803..2c88907df3b1d 100644
--- a/pandas/tests/io/sas/test_byteswap.py
+++ b/pandas/tests/io/sas/test_byteswap.py
@@ -7,6 +7,8 @@
 import numpy as np
 import pytest
 
+import pandas._testing as tm
+
 from pandas.io.sas._byteswap import (
     read_double_with_byteswap,
     read_float_with_byteswap,
@@ -35,18 +37,18 @@ def test_float_byteswap(read_offset, number, float_type, should_byteswap):
 
 
 def _test(number, number_type, read_offset, should_byteswap):
-    number = number_type([number])
+    number = number_type(number)
     data = np.random.default_rng().integers(0, 256, size=20, dtype="uint8")
-    data[read_offset : read_offset + number.itemsize] = number.view("uint8")
+    data[read_offset : read_offset + number.itemsize] = number[None].view("uint8")
     swap_func = {
         np.float32: read_float_with_byteswap,
         np.float64: read_double_with_byteswap,
         np.uint16: read_uint16_with_byteswap,
         np.uint32: read_uint32_with_byteswap,
         np.uint64: read_uint64_with_byteswap,
-    }[type(number[0])]
-    output_number = swap_func(bytes(data), read_offset, should_byteswap)
+    }[type(number)]
+    output_number = number_type(swap_func(bytes(data), read_offset, should_byteswap))
     if should_byteswap:
-        np.testing.assert_equal(output_number, number.byteswap())
+        tm.assert_equal(output_number, number.byteswap())
     else:
-        np.testing.assert_equal(output_number, number)
+        tm.assert_equal(output_number, number)

From bf0976a608fe2dcca56f94c85c6b087296ee53b4 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Thu, 15 Sep 2022 11:40:39 +0200
Subject: [PATCH 14/17] Update v1.6.0.rst

---
 doc/source/whatsnew/v1.6.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst
index e139af76a4926..89c6ec2b8b5fb 100644
--- a/doc/source/whatsnew/v1.6.0.rst
+++ b/doc/source/whatsnew/v1.6.0.rst
@@ -112,7 +112,7 @@ Performance improvements
 - Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`)
 - Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
 - Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).
-- Performance improvement to :func:`read_sas` with ``blank_missing=True`` (:issue:`48502`)
+- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
 -
 
 .. ---------------------------------------------------------------------------

From c7c1a2f46e77f8a52074ba5ca17e012187748fad Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Tue, 4 Oct 2022 11:46:12 +0200
Subject: [PATCH 15/17] read_int -> read_uint

---
 pandas/io/sas/sas7bdat.py | 66 +++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 23a62fe4c6def..ccb1cd8f3d9d3 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -310,7 +310,7 @@ def _get_properties(self) -> None:
         )
         self.date_modified = epoch + pd.to_timedelta(x, unit="s")
 
-        self.header_length = self._read_int(
+        self.header_length = self._read_uint(
             const.header_size_offset + align1, const.header_size_length
         )
 
@@ -322,10 +322,10 @@ def _get_properties(self) -> None:
         if len(self._cached_page) != self.header_length:  # type: ignore[arg-type]
             raise ValueError("The SAS7BDAT file appears to be truncated.")
 
-        self._page_length = self._read_int(
+        self._page_length = self._read_uint(
             const.page_size_offset + align1, const.page_size_length
         )
-        self._page_count = self._read_int(
+        self._page_count = self._read_uint(
             const.page_count_offset + align1, const.page_count_length
         )
 
@@ -371,8 +371,8 @@ def _read_float(self, offset: int, width: int):
             self.close()
             raise ValueError("invalid float width")
 
-    # Read a single signed integer of the given width (1, 2, 4 or 8).
-    def _read_int(self, offset: int, width: int) -> int:
+    # Read a single unsigned integer of the given width (1, 2, 4 or 8).
+    def _read_uint(self, offset: int, width: int) -> int:
         assert self._cached_page is not None
         if width == 1:
             return self._read_bytes(offset, 1)[0]
@@ -431,12 +431,12 @@ def _read_page_header(self) -> None:
         bit_offset = self._page_bit_offset
         tx = const.page_type_offset + bit_offset
         self._current_page_type = (
-            self._read_int(tx, const.page_type_length) & const.page_type_mask2
+            self._read_uint(tx, const.page_type_length) & const.page_type_mask2
         )
         tx = const.block_count_offset + bit_offset
-        self._current_page_block_count = self._read_int(tx, const.block_count_length)
+        self._current_page_block_count = self._read_uint(tx, const.block_count_length)
         tx = const.subheader_count_offset + bit_offset
-        self._current_page_subheaders_count = self._read_int(
+        self._current_page_subheaders_count = self._read_uint(
             tx, const.subheader_count_length
         )
 
@@ -477,16 +477,16 @@ def _process_subheader_pointers(
         subheader_pointer_length = self._subheader_pointer_length
         total_offset = offset + subheader_pointer_length * subheader_pointer_index
 
-        subheader_offset = self._read_int(total_offset, self._int_length)
+        subheader_offset = self._read_uint(total_offset, self._int_length)
         total_offset += self._int_length
 
-        subheader_length = self._read_int(total_offset, self._int_length)
+        subheader_length = self._read_uint(total_offset, self._int_length)
         total_offset += self._int_length
 
-        subheader_compression = self._read_int(total_offset, 1)
+        subheader_compression = self._read_uint(total_offset, 1)
         total_offset += 1
 
-        subheader_type = self._read_int(total_offset, 1)
+        subheader_type = self._read_uint(total_offset, 1)
 
         x = _SubheaderPointer(
             subheader_offset, subheader_length, subheader_compression, subheader_type
@@ -540,27 +540,27 @@ def _process_rowsize_subheader(self, offset: int, length: int) -> None:
             lcs_offset += 354
             lcp_offset += 378
 
-        self.row_length = self._read_int(
+        self.row_length = self._read_uint(
             offset + const.row_length_offset_multiplier * int_len, int_len
         )
-        self.row_count = self._read_int(
+        self.row_count = self._read_uint(
             offset + const.row_count_offset_multiplier * int_len, int_len
         )
-        self.col_count_p1 = self._read_int(
+        self.col_count_p1 = self._read_uint(
             offset + const.col_count_p1_multiplier * int_len, int_len
         )
-        self.col_count_p2 = self._read_int(
+        self.col_count_p2 = self._read_uint(
             offset + const.col_count_p2_multiplier * int_len, int_len
         )
         mx = const.row_count_on_mix_page_offset_multiplier * int_len
-        self._mix_page_row_count = self._read_int(offset + mx, int_len)
-        self._lcs = self._read_int(lcs_offset, 2)
-        self._lcp = self._read_int(lcp_offset, 2)
+        self._mix_page_row_count = self._read_uint(offset + mx, int_len)
+        self._lcs = self._read_uint(lcs_offset, 2)
+        self._lcp = self._read_uint(lcp_offset, 2)
 
     def _process_columnsize_subheader(self, offset: int, length: int) -> None:
         int_len = self._int_length
         offset += int_len
-        self.column_count = self._read_int(offset, int_len)
+        self.column_count = self._read_uint(offset, int_len)
         if self.col_count_p1 + self.col_count_p2 != self.column_count:
             print(
                 f"Warning: column count mismatch ({self.col_count_p1} + "
@@ -574,7 +574,7 @@ def _process_subheader_counts(self, offset: int, length: int) -> None:
     def _process_columntext_subheader(self, offset: int, length: int) -> None:
 
         offset += self._int_length
-        text_block_size = self._read_int(offset, const.text_block_size_length)
+        text_block_size = self._read_uint(offset, const.text_block_size_length)
 
         buf = self._read_bytes(offset, text_block_size)
         cname_raw = buf[0:text_block_size].rstrip(b"\x00 ")
@@ -638,13 +638,13 @@ def _process_columnname_subheader(self, offset: int, length: int) -> None:
                 + const.column_name_length_offset
             )
 
-            idx = self._read_int(
+            idx = self._read_uint(
                 text_subheader, const.column_name_text_subheader_length
             )
-            col_offset = self._read_int(
+            col_offset = self._read_uint(
                 col_name_offset, const.column_name_offset_length
             )
-            col_len = self._read_int(col_name_length, const.column_name_length_length)
+            col_len = self._read_uint(col_name_length, const.column_name_length_length)
 
             name_raw = self.column_names_raw[idx]
             cname = name_raw[col_offset : col_offset + col_len]
@@ -667,13 +667,13 @@ def _process_columnattributes_subheader(self, offset: int, length: int) -> None:
                 offset + 2 * int_len + const.column_type_offset + i * (int_len + 8)
             )
 
-            x = self._read_int(col_data_offset, int_len)
+            x = self._read_uint(col_data_offset, int_len)
             self._column_data_offsets.append(x)
 
-            x = self._read_int(col_data_len, const.column_data_length_length)
+            x = self._read_uint(col_data_len, const.column_data_length_length)
             self._column_data_lengths.append(x)
 
-            x = self._read_int(col_types, const.column_type_length)
+            x = self._read_uint(col_types, const.column_type_length)
             self._column_types.append(b"d" if x == 1 else b"s")
 
     def _process_columnlist_subheader(self, offset: int, length: int) -> None:
@@ -693,23 +693,23 @@ def _process_format_subheader(self, offset: int, length: int) -> None:
         col_label_offset = offset + const.column_label_offset_offset + 3 * int_len
         col_label_len = offset + const.column_label_length_offset + 3 * int_len
 
-        x = self._read_int(
+        x = self._read_uint(
             text_subheader_format, const.column_format_text_subheader_index_length
         )
         format_idx = min(x, len(self.column_names_raw) - 1)
 
-        format_start = self._read_int(
+        format_start = self._read_uint(
             col_format_offset, const.column_format_offset_length
         )
-        format_len = self._read_int(col_format_len, const.column_format_length_length)
+        format_len = self._read_uint(col_format_len, const.column_format_length_length)
 
-        label_idx = self._read_int(
+        label_idx = self._read_uint(
             text_subheader_label, const.column_label_text_subheader_index_length
         )
         label_idx = min(label_idx, len(self.column_names_raw) - 1)
 
-        label_start = self._read_int(col_label_offset, const.column_label_offset_length)
-        label_len = self._read_int(col_label_len, const.column_label_length_length)
+        label_start = self._read_uint(col_label_offset, const.column_label_offset_length)
+        label_len = self._read_uint(col_label_len, const.column_label_length_length)
 
         label_names = self.column_names_raw[label_idx]
         column_label = self._convert_header_text(

From 6a4a556c19e1795c7174b04f5ecc2a9eaed8e572 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Tue, 4 Oct 2022 12:00:58 +0200
Subject: [PATCH 16/17] Lint

---
 pandas/io/sas/sas7bdat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index ccb1cd8f3d9d3..c331064f72ee3 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -708,7 +708,9 @@ def _process_format_subheader(self, offset: int, length: int) -> None:
         )
         label_idx = min(label_idx, len(self.column_names_raw) - 1)
 
-        label_start = self._read_uint(col_label_offset, const.column_label_offset_length)
+        label_start = self._read_uint(
+            col_label_offset, const.column_label_offset_length
+        )
         label_len = self._read_uint(col_label_len, const.column_label_length_length)
 
         label_names = self.column_names_raw[label_idx]

From a4394348f49ab0dd0d70c48edf03bb6ee38b1569 Mon Sep 17 00:00:00 2001
From: Jonas Haag <jonas@lophus.org>
Date: Tue, 4 Oct 2022 21:39:44 +0200
Subject: [PATCH 17/17] Update sas7bdat.py

---
 pandas/io/sas/sas7bdat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 5fd89319d3115..c9e1cd7940d7e 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -459,7 +459,7 @@ def _process_page_metadata(self) -> None:
             subheader_compression = self._read_uint(total_offset, 1)
             total_offset += 1
 
-            subheader_type = self._read_int(total_offset, 1)
+            subheader_type = self._read_uint(total_offset, 1)
 
             if (
                 subheader_length == 0