From 6ef619ec9fec9d0f12f8ffbd4338e3b23dd7a82b Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 15 Jan 2023 15:33:17 +0100
Subject: [PATCH 1/9] BUG: Series constructor overflowing for UInt64

---
 doc/source/whatsnew/v2.0.0.rst           |  1 +
 pandas/core/arrays/numeric.py            | 15 +++++++++++++++
 pandas/tests/series/test_constructors.py | 22 ++++++++++++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 6c8dd2f8da938..951116264daec 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1075,6 +1075,7 @@ ExtensionArray
 - Bug in :meth:`array.PandasArray.to_numpy` raising with ``NA`` value when ``na_value`` is specified (:issue:`40638`)
 - Bug in :meth:`api.types.is_numeric_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``_is_numeric`` returned ``True`` (:issue:`50563`)
 - Bug in :meth:`api.types.is_integer_dtype`, :meth:`api.types.is_unsigned_integer_dtype`, :meth:`api.types.is_signed_integer_dtype`, :meth:`api.types.is_float_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``kind`` returned the corresponding NumPy type (:issue:`50667`)
+- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`)
 
 Styler
 ^^^^^^
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 0a30c28acb090..bbe273bd34806 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -168,6 +168,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
             mask = mask.copy()
         return values, mask, dtype, inferred_type
 
+    original = values
     values = np.array(values, copy=copy)
     inferred_type = None
     if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
@@ -204,6 +205,20 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
     else:
         dtype = dtype.type
 
+    if (
+        is_integer_dtype(dtype)
+        and is_float_dtype(values.dtype)
+        and len(values) > 0
+        and np.max(values) > np.iinfo(np.intp).max
+    ):
+        # we either have floats and int targets which will raise below
+        # or ints that lose precision with float, so keep object for now
+        inferred_type = lib.infer_dtype(original, skipna=True)
+        if inferred_type not in ["floating", "mixed-integer-float"]:
+            values = np.array(original, dtype=dtype, copy=False)
+        else:
+            values = np.array(original, dtype="object", copy=False)
+
     # we copy as need to coerce here
     if mask.any():
         values = values.copy()
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 05e40e20f1226..a7c291f845744 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -49,6 +49,7 @@
 import pandas._testing as tm
 from pandas.core.api import Int64Index
 from pandas.core.arrays import (
+    IntegerArray,
     IntervalArray,
     period_array,
 )
@@ -2007,6 +2008,27 @@ def test_series_constructor_ea_int_from_string_bool(self):
         with pytest.raises(ValueError, match="invalid literal"):
             Series(["True", "False", "True", pd.NA], dtype="Int64")
 
+    @pytest.mark.parametrize("val", [1, 1.0])
+    def test_series_constructor_overflow_uint_ea(self, val):
+        # GH#38798
+        max_val = np.iinfo(np.uint64).max - 1
+        result = Series([max_val, val], dtype="UInt64")
+        expected = Series(np.array([max_val, 1], dtype="uint64"), dtype="UInt64")
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("val", [1, 1.0])
+    def test_series_constructor_overflow_uint_ea_with_na(self, val):
+        # GH#38798
+        max_val = np.iinfo(np.uint64).max - 1
+        result = Series([max_val, val, pd.NA], dtype="UInt64")
+        expected = Series(
+            IntegerArray(
+                np.array([max_val, 1, 0], dtype="uint64"),
+                np.array([0, 0, 1], dtype=np.bool_),
+            )
+        )
+        tm.assert_series_equal(result, expected)
+
 
 class TestSeriesConstructorIndexCoercion:
     def test_series_constructor_datetimelike_index_coercion(self):

From e02724520096e7a8b23b7bcd384baaf76fc1e5c1 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 15 Jan 2023 16:30:29 +0100
Subject: [PATCH 2/9] Remove xfail

---
 pandas/tests/tools/test_to_numeric.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index 1c0a8301d65cc..d3701c30aa50c 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -4,8 +4,6 @@
 from numpy import iinfo
 import pytest
 
-from pandas.compat import is_platform_arm
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -755,13 +753,7 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected
         ([1.0, 1.1], "Float64", "signed", "Float64"),
         ([1, pd.NA], "Int64", "signed", "Int8"),
         ([450, -300], "Int64", "signed", "Int16"),
-        pytest.param(
-            [np.iinfo(np.uint64).max - 1, 1],
-            "UInt64",
-            "signed",
-            "UInt64",
-            marks=pytest.mark.xfail(not is_platform_arm(), reason="GH38798"),
-        ),
+        ([np.iinfo(np.uint64).max - 1, 1], "UInt64", "signed", "UInt64"),
         ([1, 1], "Int64", "unsigned", "UInt8"),
         ([1.0, 1.0], "Float32", "unsigned", "UInt8"),
         ([1.0, 1.1], "Float64", "unsigned", "Float64"),

From 94a5c6a548611449e1e848d01c6be0e996b96725 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 15 Jan 2023 20:00:19 +0100
Subject: [PATCH 3/9] Fix other issue

---
 doc/source/whatsnew/v2.0.0.rst           |  2 +-
 pandas/core/arrays/numeric.py            | 25 ++++++++++++------------
 pandas/tests/series/test_constructors.py | 12 ++++++++++++
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 951116264daec..e20a6a04ce417 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1075,7 +1075,7 @@ ExtensionArray
 - Bug in :meth:`array.PandasArray.to_numpy` raising with ``NA`` value when ``na_value`` is specified (:issue:`40638`)
 - Bug in :meth:`api.types.is_numeric_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``_is_numeric`` returned ``True`` (:issue:`50563`)
 - Bug in :meth:`api.types.is_integer_dtype`, :meth:`api.types.is_unsigned_integer_dtype`, :meth:`api.types.is_signed_integer_dtype`, :meth:`api.types.is_float_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``kind`` returned the corresponding NumPy type (:issue:`50667`)
-- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`)
+- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`)
 
 Styler
 ^^^^^^
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index bbe273bd34806..e4ab92335aacf 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -205,19 +205,18 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
     else:
         dtype = dtype.type
 
-    if (
-        is_integer_dtype(dtype)
-        and is_float_dtype(values.dtype)
-        and len(values) > 0
-        and np.max(values) > np.iinfo(np.intp).max
-    ):
-        # we either have floats and int targets which will raise below
-        # or ints that lose precision with float, so keep object for now
-        inferred_type = lib.infer_dtype(original, skipna=True)
-        if inferred_type not in ["floating", "mixed-integer-float"]:
-            values = np.array(original, dtype=dtype, copy=False)
-        else:
-            values = np.array(original, dtype="object", copy=False)
+    if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
+        idx = np.argmax(values)
+        if values[idx] != original[idx]:
+            # We have ints that lost precision during the cast.
+            inferred_type = lib.infer_dtype(original, skipna=True)
+            if (
+                inferred_type not in ["floating", "mixed-integer-float"]
+                and not mask.any()
+            ):
+                values = np.array(original, dtype=dtype, copy=False)
+            else:
+                values = np.array(original, dtype="object", copy=False)
 
     # we copy as need to coerce here
     if mask.any():
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index a7c291f845744..8c05eeac39033 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -2029,6 +2029,18 @@ def test_series_constructor_overflow_uint_ea_with_na(self, val):
         )
         tm.assert_series_equal(result, expected)
 
+    def test_series_constructor_overflow_int_with_nan(self):
+        # GH#38798
+        max_val = np.iinfo(np.uint64).max - 1
+        result = Series([max_val, np.nan], dtype="UInt64")
+        expected = Series(
+            IntegerArray(
+                np.array([max_val, 1], dtype="uint64"),
+                np.array([0, 1], dtype=np.bool_),
+            )
+        )
+        tm.assert_series_equal(result, expected)
+
 
 class TestSeriesConstructorIndexCoercion:
     def test_series_constructor_datetimelike_index_coercion(self):

From adcaa7f9611c0f6e9a82fc8d5ad4d3996d628c17 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 15 Jan 2023 22:22:32 +0100
Subject: [PATCH 4/9] Fix

---
 pandas/core/arrays/numeric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index e4ab92335aacf..b5ef10256a80e 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -207,7 +207,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
 
     if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
         idx = np.argmax(values)
-        if values[idx] != original[idx]:
+        if int(values[idx]) != original[idx]:
             # We have ints that lost precision during the cast.
             inferred_type = lib.infer_dtype(original, skipna=True)
             if (

From b3707476b45c708b530f68d66371d3480647c5c9 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 15 Jan 2023 23:36:36 +0100
Subject: [PATCH 5/9] Fix

---
 pandas/core/arrays/numeric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index b5ef10256a80e..b3f78e5133324 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -207,7 +207,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
 
     if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
         idx = np.argmax(values)
-        if int(values[idx]) != original[idx]:
+        if not np.isnan(values[idx]) and int(values[idx]) != original[idx]:
             # We have ints that lost precision during the cast.
             inferred_type = lib.infer_dtype(original, skipna=True)
             if (

From 944df67de411e1c10122e3708875350319a5925f Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 16 Jan 2023 01:07:19 +0100
Subject: [PATCH 6/9] Fix

---
 pandas/core/arrays/numeric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index b3f78e5133324..0bfbd68be48d3 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -206,7 +206,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
         dtype = dtype.type
 
     if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
-        idx = np.argmax(values)
+        idx = np.nanargmax(values)
         if not np.isnan(values[idx]) and int(values[idx]) != original[idx]:
             # We have ints that lost precision during the cast.
             inferred_type = lib.infer_dtype(original, skipna=True)

From 1d91cca85c0d65cb69d17e3f50b126a88716286a Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 16 Jan 2023 01:17:35 +0100
Subject: [PATCH 7/9] Check for all na

---
 pandas/core/arrays/numeric.py            |  9 +++++++--
 pandas/tests/series/test_constructors.py | 11 +++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 0bfbd68be48d3..df8c2e4dab1eb 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -205,9 +205,14 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
     else:
         dtype = dtype.type
 
-    if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
+    if (
+        is_integer_dtype(dtype)
+        and is_float_dtype(values.dtype)
+        and len(values) > 0
+        and not mask.all()
+    ):
         idx = np.nanargmax(values)
-        if not np.isnan(values[idx]) and int(values[idx]) != original[idx]:
+        if int(values[idx]) != original[idx]:
             # We have ints that lost precision during the cast.
             inferred_type = lib.infer_dtype(original, skipna=True)
             if (
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 8c05eeac39033..0c48457eb656e 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -2041,6 +2041,17 @@ def test_series_constructor_overflow_int_with_nan(self):
         )
         tm.assert_series_equal(result, expected)
 
+    def test_series_constructor_ea_all_na(self):
+        # GH#38798
+        result = Series([np.nan, np.nan], dtype="UInt64")
+        expected = Series(
+            IntegerArray(
+                np.array([1, 1], dtype="uint64"),
+                np.array([1, 1], dtype=np.bool_),
+            )
+        )
+        tm.assert_series_equal(result, expected)
+
 
 class TestSeriesConstructorIndexCoercion:
     def test_series_constructor_datetimelike_index_coercion(self):

From 0b1e3f82f2313b052624b9275bb15e93ea63a119 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 16 Jan 2023 01:39:19 +0100
Subject: [PATCH 8/9] Fix all na case

---
 pandas/core/arrays/numeric.py | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index df8c2e4dab1eb..6ae2c3a2e2749 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -205,23 +205,21 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
     else:
         dtype = dtype.type
 
-    if (
-        is_integer_dtype(dtype)
-        and is_float_dtype(values.dtype)
-        and len(values) > 0
-        and not mask.all()
-    ):
-        idx = np.nanargmax(values)
-        if int(values[idx]) != original[idx]:
-            # We have ints that lost precision during the cast.
-            inferred_type = lib.infer_dtype(original, skipna=True)
-            if (
-                inferred_type not in ["floating", "mixed-integer-float"]
-                and not mask.any()
-            ):
-                values = np.array(original, dtype=dtype, copy=False)
-            else:
-                values = np.array(original, dtype="object", copy=False)
+    if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
+        if mask.all():
+            values = np.ones(values.shape, dtype=dtype)
+        else:
+            idx = np.nanargmax(values)
+            if int(values[idx]) != original[idx]:
+                # We have ints that lost precision during the cast.
+                inferred_type = lib.infer_dtype(original, skipna=True)
+                if (
+                    inferred_type not in ["floating", "mixed-integer-float"]
+                    and not mask.any()
+                ):
+                    values = np.array(original, dtype=dtype, copy=False)
+                else:
+                    values = np.array(original, dtype="object", copy=False)
 
     # we copy as need to coerce here
     if mask.any():

From 9c9a68a688a28f2495f9755d6618e6525028f565 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Wed, 18 Jan 2023 12:14:06 +0100
Subject: [PATCH 9/9] Update test_constructors.py

---
 pandas/tests/series/test_constructors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 0c48457eb656e..bd53e38f4ce28 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -2029,7 +2029,7 @@ def test_series_constructor_overflow_uint_ea_with_na(self, val):
         )
         tm.assert_series_equal(result, expected)
 
-    def test_series_constructor_overflow_int_with_nan(self):
+    def test_series_constructor_overflow_uint_with_nan(self):
         # GH#38798
         max_val = np.iinfo(np.uint64).max - 1
         result = Series([max_val, np.nan], dtype="UInt64")