From 3e1784de50a9305dbccb185d4b2830a4c4addbed Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Mon, 10 May 2021 16:19:47 -0700
Subject: [PATCH 01/28] API: allow nan-likes in StringArray constructor

---
 doc/source/whatsnew/v1.3.0.rst             |  1 +
 pandas/_libs/lib.pyx                       | 24 ++++++++++++++------
 pandas/core/arrays/string_.py              | 26 +++++++++++++++++-----
 pandas/tests/arrays/string_/test_string.py | 12 +++++-----
 4 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 5adc8540e6864..fd246cb554d7f 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -612,6 +612,7 @@ Other API changes
 - Partially initialized :class:`CategoricalDtype` (i.e. those with ``categories=None`` objects will no longer compare as equal to fully initialized dtype objects.
 - Accessing ``_constructor_expanddim`` on a :class:`DataFrame` and ``_constructor_sliced`` on a :class:`Series` now raise an ``AttributeError``. Previously a ``NotImplementedError`` was raised (:issue:`38782`)
 - Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as ``turbodbc`` (:issue:`36893`)
+- :class:`StringArray` now accepts nan-likes(``None``, ``nan``, ``NaT``, ``NA``, Decimal("NaN")) in its constructor in addition to strings.
 
 Build
 =====
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index e1cb744c7033c..fcb6d39bfc91f 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -679,11 +679,14 @@ cpdef ndarray[object] ensure_string_array(
         arr,
         object na_value=np.nan,
         bint convert_na_value=True,
+        bint coerce=True,
         bint copy=True,
         bint skipna=True,
 ):
     """
-    Returns a new numpy array with object dtype and only strings and na values.
+    Checks that all elements in numpy are string or null and returns a new numpy array
+    with object dtype and only strings and na values if so. Otherwise,
+    raise a ValueError.
 
     Parameters
     ----------
@@ -693,6 +696,9 @@ cpdef ndarray[object] ensure_string_array(
         The value to use for na. For example, np.nan or pd.NA.
     convert_na_value : bool, default True
         If False, existing na values will be used unchanged in the new array.
+    coerce : bool, default True
+        Whether to coerce non-null non-string elements to strings.
+        Will raise ValueError otherwise.
     copy : bool, default True
         Whether to ensure that a new array is returned.
     skipna : bool, default True
@@ -724,7 +730,10 @@ cpdef ndarray[object] ensure_string_array(
             continue
 
         if not checknull(val):
-            result[i] = str(val)
+            if coerce:
+                result[i] = str(val)
+            else:
+                raise ValueError("Non-string element encountered in array.")
         else:
             if convert_na_value:
                 val = na_value
@@ -1835,10 +1844,6 @@ cdef class StringValidator(Validator):
     cdef inline bint is_array_typed(self) except -1:
         return issubclass(self.dtype.type, np.str_)
 
-    cdef bint is_valid_null(self, object value) except -1:
-        # We deliberately exclude None / NaN here since StringArray uses NA
-        return value is C_NA
-
 
 cpdef bint is_string_array(ndarray values, bint skipna=False):
     cdef:
@@ -2059,7 +2064,7 @@ def maybe_convert_numeric(
         upcasting for ints with nulls to float64.
     Returns
     -------
-    np.ndarray
+    np.ndarray or tuple of converted values and its mask
         Array of converted object values to numerical ones.
 
     Optional[np.ndarray]
@@ -2224,6 +2229,11 @@ def maybe_convert_numeric(
     if allow_null_in_int and seen.null_ and not seen.int_:
         seen.float_ = True
 
+    # This occurs since we disabled float nulls showing as null in anticipation
+    # of seeing ints that were never seen. So then, we return float
+    if allow_null_in_int and seen.null_ and not seen.int_:
+        seen.float_ = True
+
     if seen.complex_:
         return (complexes, None)
     elif seen.float_:
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 74ca5130ca322..c30d4b8ba7b41 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -144,11 +144,18 @@ class StringArray(PandasArray):
         .. warning::
 
            Currently, this expects an object-dtype ndarray
-           where the elements are Python strings or :attr:`pandas.NA`.
+           where the elements are Python strings
+           or nan-likes(``None``, ``nan``, ``NaT``, ``NA``, Decimal("NaN")).
            This may change without warning in the future. Use
            :meth:`pandas.array` with ``dtype="string"`` for a stable way of
            creating a `StringArray` from any sequence.
 
+        .. versionchanged:: 1.3
+
+           StringArray now accepts nan-likes in the constructor in addition
+           to strings, whereas it only accepted strings and :attr:`pandas.NA`
+           before.
+
     copy : bool, default False
         Whether to copy the array of data.
 
@@ -208,21 +215,30 @@ def __init__(self, values, copy=False):
         values = extract_array(values)
 
         super().__init__(values, copy=copy)
+        if not isinstance(values, type(self)):
+            self._validate()
         # error: Incompatible types in assignment (expression has type "StringDtype",
         # variable has type "PandasDtype")
         NDArrayBacked.__init__(self, self._ndarray, StringDtype())
-        if not isinstance(values, type(self)):
-            self._validate()
 
     def _validate(self):
         """Validate that we only store NA or strings."""
-        if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True):
-            raise ValueError("StringArray requires a sequence of strings or pandas.NA")
         if self._ndarray.dtype != "object":
             raise ValueError(
                 "StringArray requires a sequence of strings or pandas.NA. Got "
                 f"'{self._ndarray.dtype}' dtype instead."
             )
+        try:
+            lib.ensure_string_array(
+                self._ndarray, na_value=StringDtype.na_value, coerce=False, copy=False
+            ),
+            NDArrayBacked.__init__(
+                self,
+                self._ndarray,
+                StringDtype(),
+            )
+        except ValueError:
+            raise ValueError("StringArray requires a sequence of strings or pandas.NA")
 
     @classmethod
     def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 17d05ebeb0fc5..722aada176c44 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -297,13 +297,15 @@ def test_constructor_raises(cls):
         cls(np.array([]))
 
     with pytest.raises(ValueError, match=msg):
-        cls(np.array(["a", np.nan], dtype=object))
+        cls(np.array(["a", None]))
 
-    with pytest.raises(ValueError, match=msg):
-        cls(np.array(["a", None], dtype=object))
 
-    with pytest.raises(ValueError, match=msg):
-        cls(np.array(["a", pd.NaT], dtype=object))
+@pytest.mark.parametrize("na", [np.nan, pd.NaT, None, pd.NA])
+def test_constructor_nan_like(na):
+    expected = pd.arrays.StringArray(np.array(["a", pd.NA]))
+    tm.assert_extension_array_equal(
+        pd.arrays.StringArray(np.array(["a", na], dtype="object")), expected
+    )
 
 
 @pytest.mark.parametrize("copy", [True, False])

From 96ff1da535cd571cd45cb60d4cd1fdb47744f79e Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Mon, 10 May 2021 19:31:47 -0700
Subject: [PATCH 02/28] Revert weird changes & Fix stuff

---
 pandas/_libs/lib.pyi                       | 1 +
 pandas/_libs/lib.pyx                       | 7 +------
 pandas/tests/arrays/string_/test_string.py | 2 +-
 pandas/tests/dtypes/test_inference.py      | 7 ++++---
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index 9dbc47f1d40f7..22990361bc52e 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -138,6 +138,7 @@ def ensure_string_array(
     arr,
     na_value: object = np.nan,
     convert_na_value: bool = True,
+    coerce: bool = True,
     copy: bool = True,
     skipna: bool = True,
 ) -> np.ndarray: ...  # np.ndarray[object]
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index fcb6d39bfc91f..b1523421e59fd 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2064,7 +2064,7 @@ def maybe_convert_numeric(
         upcasting for ints with nulls to float64.
     Returns
     -------
-    np.ndarray or tuple of converted values and its mask
+    np.ndarray
         Array of converted object values to numerical ones.
 
     Optional[np.ndarray]
@@ -2229,11 +2229,6 @@ def maybe_convert_numeric(
     if allow_null_in_int and seen.null_ and not seen.int_:
         seen.float_ = True
 
-    # This occurs since we disabled float nulls showing as null in anticipation
-    # of seeing ints that were never seen. So then, we return float
-    if allow_null_in_int and seen.null_ and not seen.int_:
-        seen.float_ = True
-
     if seen.complex_:
         return (complexes, None)
     elif seen.float_:
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 722aada176c44..b3bc3b09e047a 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -297,7 +297,7 @@ def test_constructor_raises(cls):
         cls(np.array([]))
 
     with pytest.raises(ValueError, match=msg):
-        cls(np.array(["a", None]))
+        cls(np.array(["a", np.nan]))
 
 
 @pytest.mark.parametrize("na", [np.nan, pd.NaT, None, pd.NA])
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 076cc155f3626..73e87c75ee621 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -1376,11 +1376,12 @@ def test_is_string_array(self):
         assert lib.is_string_array(
             np.array(["foo", "bar", pd.NA], dtype=object), skipna=True
         )
-        # NaN is not valid for string array, just NA
-        assert not lib.is_string_array(
+        assert lib.is_string_array(
             np.array(["foo", "bar", np.nan], dtype=object), skipna=True
         )
-
+        assert not lib.is_string_array(
+            np.array(["foo", "bar", np.nan], dtype=object), skipna=False
+        )
         assert not lib.is_string_array(np.array([1, 2]))
 
     def test_to_object_array_tuples(self):

From 418e1d201ad0c20b9c5119fff34567fe72158ec2 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Tue, 11 May 2021 07:01:06 -0700
Subject: [PATCH 03/28] Remove failing test

---
 pandas/tests/arrays/string_/test_string.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index b3bc3b09e047a..7feb22f69632a 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -296,9 +296,6 @@ def test_constructor_raises(cls):
     with pytest.raises(ValueError, match=msg):
         cls(np.array([]))
 
-    with pytest.raises(ValueError, match=msg):
-        cls(np.array(["a", np.nan]))
-
 
 @pytest.mark.parametrize("na", [np.nan, pd.NaT, None, pd.NA])
 def test_constructor_nan_like(na):

From 25a6c4d2ec9287b5b0a341c3cdd583cc3659a276 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Wed, 19 May 2021 16:23:41 -0700
Subject: [PATCH 04/28] Changes from code review

---
 pandas/_libs/lib.pyi               |  3 +--
 pandas/_libs/lib.pyx               | 24 ++++++++++++++----------
 pandas/core/arrays/string_.py      |  9 ++-------
 pandas/core/arrays/string_arrow.py |  2 +-
 pandas/core/dtypes/cast.py         |  4 ++--
 5 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index 22990361bc52e..966fd0cd4c008 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -137,8 +137,7 @@ def maybe_convert_numeric(
 def ensure_string_array(
     arr,
     na_value: object = np.nan,
-    convert_na_value: bool = True,
-    coerce: bool = True,
+    coerce: str = "all,
     copy: bool = True,
     skipna: bool = True,
 ) -> np.ndarray: ...  # np.ndarray[object]
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index b1523421e59fd..fc3d73f332646 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -678,15 +678,14 @@ def astype_intsafe(ndarray[object] arr, cnp.dtype new_dtype) -> ndarray:
 cpdef ndarray[object] ensure_string_array(
         arr,
         object na_value=np.nan,
-        bint convert_na_value=True,
-        bint coerce=True,
+        coerce="all",
         bint copy=True,
         bint skipna=True,
 ):
     """
-    Checks that all elements in numpy are string or null and returns a new numpy array
-    with object dtype and only strings and na values if so. Otherwise,
-    raise a ValueError.
+    Checks that all elements in numpy array are string or null
+    and returns a new numpy array with object dtype
+    and only strings and na values if so. Otherwise, raise a ValueError.
 
     Parameters
     ----------
@@ -696,9 +695,14 @@ cpdef ndarray[object] ensure_string_array(
         The value to use for na. For example, np.nan or pd.NA.
     convert_na_value : bool, default True
         If False, existing na values will be used unchanged in the new array.
-    coerce : bool, default True
-        Whether to coerce non-null non-string elements to strings.
-        Will raise ValueError otherwise.
+    coerce : {{'all', 'null', 'non-null', None}}, default 'all'
+        Whether to coerce non-string elements to strings.
+            - 'all' will convert null values and non-null non-string values.
+            - 'null' will only convert nulls without converting other non-strings.
+            - 'non-null' will only convert non-null non-string elements to string.
+            - None will not convert anything.
+        If coerce is not all, a ValueError will be raised for values
+        that are not strings or na_value.
     copy : bool, default True
         Whether to ensure that a new array is returned.
     skipna : bool, default True
@@ -730,12 +734,12 @@ cpdef ndarray[object] ensure_string_array(
             continue
 
         if not checknull(val):
-            if coerce:
+            if coerce =="all" or coerce == "non-null":
                 result[i] = str(val)
             else:
                 raise ValueError("Non-string element encountered in array.")
         else:
-            if convert_na_value:
+            if coerce=="all" or coerce == "null":
                 val = na_value
             if skipna:
                 result[i] = val
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index c30d4b8ba7b41..289204c9aa4e5 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -230,13 +230,8 @@ def _validate(self):
             )
         try:
             lib.ensure_string_array(
-                self._ndarray, na_value=StringDtype.na_value, coerce=False, copy=False
+                self._ndarray, na_value=StringDtype.na_value, coerce="null", copy=False
             ),
-            NDArrayBacked.__init__(
-                self,
-                self._ndarray,
-                StringDtype(),
-            )
         except ValueError:
             raise ValueError("StringArray requires a sequence of strings or pandas.NA")
 
@@ -251,7 +246,7 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
             # avoid costly conversion to object dtype
             na_values = scalars._mask
             result = scalars._data
-            result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
+            result = lib.ensure_string_array(result, copy=copy, coerce="non-null")
             result[na_values] = StringDtype.na_value
 
         else:
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 219a8c7ec0b82..42b7bf1a52513 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -247,7 +247,7 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
             # numerical issues with Float32Dtype
             na_values = scalars._mask
             result = scalars._data
-            result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
+            result = lib.ensure_string_array(result, copy=copy, coerce="non-null")
             return cls(pa.array(result, mask=na_values, type=pa.string()))
 
         # convert non-na-likes to str
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 46dc97214e2f6..1e8c09136e223 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1125,7 +1125,7 @@ def astype_nansafe(
         return arr.astype(dtype, copy=copy)
 
     if issubclass(dtype.type, str):
-        return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False)
+        return lib.ensure_string_array(arr, skipna=skipna, convert_na_value="non-null")
 
     elif is_datetime64_dtype(arr):
         if dtype == np.int64:
@@ -1925,7 +1925,7 @@ def construct_1d_ndarray_preserving_na(
     """
 
     if dtype is not None and dtype.kind == "U":
-        subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy)
+        subarr = lib.ensure_string_array(values, coerce="non-null", copy=copy)
     else:
         if dtype is not None:
             _disallow_mismatched_datetimelike(values, dtype)

From 8257dbd739a4b6f12b737f89da317a24d3f8b07f Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Thu, 20 May 2021 14:32:58 -0700
Subject: [PATCH 05/28] typo

---
 pandas/core/dtypes/cast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index c256f20527ad6..46af33b724d2a 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1125,7 +1125,7 @@ def astype_nansafe(
         return arr.astype(dtype, copy=copy)
 
     if issubclass(dtype.type, str):
-        return lib.ensure_string_array(arr, skipna=skipna, convert_na_value="non-null")
+        return lib.ensure_string_array(arr, skipna=skipna, coerce="non-null")
 
     elif is_datetime64_dtype(arr):
         if dtype == np.int64:

From 922436a78903dfa55cd1d54d4381477cad934af5 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Fri, 21 May 2021 13:30:08 -0700
Subject: [PATCH 06/28] Update lib.pyi

---
 pandas/_libs/lib.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index 1e49ce67f7cec..726b306e71fd5 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -146,7 +146,7 @@ def maybe_convert_numeric(
 def ensure_string_array(
     arr,
     na_value: object = np.nan,
-    coerce: str = "all,
+    coerce: str = "all",
     copy: bool = True,
     skipna: bool = True,
 ) -> np.ndarray: ...  # np.ndarray[object]

From 2f28086a0f23bf2b30d79ca41aaab0abb3ca370b Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sat, 29 May 2021 11:03:33 -0700
Subject: [PATCH 07/28] Update lib.pyx

---
 pandas/_libs/lib.pyx | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 8df50a32ae482..99872d2f9e91f 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1849,7 +1849,11 @@ cdef class StringValidator(Validator):
 
     cdef inline bint is_array_typed(self) except -1:
         return issubclass(self.dtype.type, np.str_)
-
+    
+    cdef bint is_valid_null(self, object value) except -1:
+        # Override to exclude float('Nan') and complex NaN
+        return value is None or value is C_NA or np.isnan(value)
+        
 
 cpdef bint is_string_array(ndarray values, bint skipna=False):
     cdef:

From 3ee219815e619fb57edeee0c295ba36e84232e0a Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sat, 29 May 2021 11:05:19 -0700
Subject: [PATCH 08/28] Update lib.pyx

---
 pandas/_libs/lib.pyx | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 99872d2f9e91f..ce70d15c202f5 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -693,8 +693,6 @@ cpdef ndarray[object] ensure_string_array(
         The values to be converted to str, if needed.
     na_value : Any, default np.nan
         The value to use for na. For example, np.nan or pd.NA.
-    convert_na_value : bool, default True
-        If False, existing na values will be used unchanged in the new array.
     coerce : {{'all', 'null', 'non-null', None}}, default 'all'
         Whether to coerce non-string elements to strings.
             - 'all' will convert null values and non-null non-string values.
@@ -1849,11 +1847,11 @@ cdef class StringValidator(Validator):
 
     cdef inline bint is_array_typed(self) except -1:
         return issubclass(self.dtype.type, np.str_)
-    
+
     cdef bint is_valid_null(self, object value) except -1:
         # Override to exclude float('Nan') and complex NaN
         return value is None or value is C_NA or np.isnan(value)
-        
+
 
 cpdef bint is_string_array(ndarray values, bint skipna=False):
     cdef:

From 3ee55f25a94a12da069a387a150164538394d460 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Sat, 29 May 2021 21:21:57 -0700
Subject: [PATCH 09/28] Updates

---
 pandas/tests/arrays/string_/test_string.py | 5 ++++-
 pandas/tests/dtypes/test_inference.py      | 6 ++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index a246be938aef0..af57aff03b073 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -289,8 +289,11 @@ def test_constructor_raises(cls):
     with pytest.raises(ValueError, match=msg):
         cls(np.array([]))
 
+    with pytest.raises(ValueError, match=msg):
+        cls(np.array(["a", pd.NaT], dtype=object))
+
 
-@pytest.mark.parametrize("na", [np.nan, pd.NaT, None, pd.NA])
+@pytest.mark.parametrize("na", [np.nan, None, pd.NA])
 def test_constructor_nan_like(na):
     expected = pd.arrays.StringArray(np.array(["a", pd.NA]))
     tm.assert_extension_array_equal(
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 987b3accbca2e..87a1be80e3639 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -1391,6 +1391,12 @@ def test_is_string_array(self):
         assert lib.is_string_array(
             np.array(["foo", "bar", np.nan], dtype=object), skipna=True
         )
+        assert lib.is_string_array(
+            np.array(["foo", "bar", None], dtype=object), skipna=True
+        )
+        assert not lib.is_string_array(
+            np.array(["foo", "bar", None], dtype=object), skipna=False
+        )
         assert not lib.is_string_array(
             np.array(["foo", "bar", np.nan], dtype=object), skipna=False
         )

From fe4981a6337cd59ae68b1ff44ca0f9b600d2ee49 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sun, 30 May 2021 06:18:55 -0700
Subject: [PATCH 10/28] Update lib.pyx

---
 pandas/_libs/lib.pyx | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index a1e66a575097e..08d7a68cd0dc0 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -731,7 +731,10 @@ cpdef ndarray[object] ensure_string_array(
         if isinstance(val, str):
             continue
 
-        if not checknull(val):
+        if not (val is None or val is C_NA or np.isnan(val)):
+            # We don't use checknull, since NaT, Decimal("NaN"), etc. aren't valid
+            # If they are present, they are treated like a regular Python object
+            # and will either cause an exception to be raised or be coerced.
             if coerce =="all" or coerce == "non-null":
                 result[i] = str(val)
             else:

From a66948aa7aa21d057c322895b59ea9f8c79480cd Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sun, 30 May 2021 09:26:52 -0700
Subject: [PATCH 11/28] Update lib.pyx

---
 pandas/_libs/lib.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 08d7a68cd0dc0..a987f47533259 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -731,7 +731,7 @@ cpdef ndarray[object] ensure_string_array(
         if isinstance(val, str):
             continue
 
-        if not (val is None or val is C_NA or np.isnan(val)):
+        if not (val is None or val is C_NA or val != val):
             # We don't use checknull, since NaT, Decimal("NaN"), etc. aren't valid
             # If they are present, they are treated like a regular Python object
             # and will either cause an exception to be raised or be coerced.

From e8527191d33ed9c4416d265b175822c19bd5b4ae Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Mon, 31 May 2021 09:29:58 -0700
Subject: [PATCH 12/28] Update lib.pyx

---
 pandas/_libs/lib.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index a987f47533259..f39b1fbc49cdb 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -731,7 +731,7 @@ cpdef ndarray[object] ensure_string_array(
         if isinstance(val, str):
             continue
 
-        if not (val is None or val is C_NA or val != val):
+        if not (val is None or val is C_NA or val is np.nan):
             # We don't use checknull, since NaT, Decimal("NaN"), etc. aren't valid
             # If they are present, they are treated like a regular Python object
             # and will either cause an exception to be raised or be coerced.
@@ -1853,7 +1853,7 @@ cdef class StringValidator(Validator):
 
     cdef bint is_valid_null(self, object value) except -1:
         # Override to exclude float('Nan') and complex NaN
-        return value is None or value is C_NA or np.isnan(value)
+        return value is None or value is C_NA or value is np.nan
 
 
 cpdef bint is_string_array(ndarray values, bint skipna=False):

From 91b73bb93aad90f26040c729b57d99ec26eb3941 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Fri, 4 Jun 2021 08:28:52 -0700
Subject: [PATCH 13/28] disallow invalid nans in stringarray constructor

---
 pandas/_libs/lib.pyx          | 23 ++++++++++++++++-------
 pandas/core/arrays/string_.py |  7 +++++--
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index f39b1fbc49cdb..e3fa8eeaa9b53 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -94,6 +94,7 @@ from pandas._libs.missing cimport (
     is_null_timedelta64,
     isnaobj,
 )
+from pandas._libs.missing import checknull
 from pandas._libs.tslibs.conversion cimport convert_to_tsobject
 from pandas._libs.tslibs.nattype cimport (
     NPY_NAT,
@@ -696,10 +697,12 @@ cpdef ndarray[object] ensure_string_array(
     coerce : {{'all', 'null', 'non-null', None}}, default 'all'
         Whether to coerce non-string elements to strings.
             - 'all' will convert null values and non-null non-string values.
-            - 'null' will only convert nulls without converting other non-strings.
+            - 'strict-null' will only convert pd.NA, np.nan, or None to na_value
+              without converting other non-strings.
+            - 'null' will convert nulls to na_value w/out converting other non-strings.
             - 'non-null' will only convert non-null non-string elements to string.
             - None will not convert anything.
-        If coerce is not all, a ValueError will be raised for values
+        If coerce is not 'all', a ValueError will be raised for values
         that are not strings or na_value.
     copy : bool, default True
         Whether to ensure that a new array is returned.
@@ -714,6 +717,7 @@ cpdef ndarray[object] ensure_string_array(
     """
     cdef:
         Py_ssize_t i = 0, n = len(arr)
+        set strict_na_values = {C_NA, np.nan, None}
 
     if hasattr(arr, "to_numpy"):
         arr = arr.to_numpy()
@@ -725,22 +729,27 @@ cpdef ndarray[object] ensure_string_array(
     if copy and result is arr:
         result = result.copy()
 
+    if coerce == 'strict-null':
+        # We don't use checknull, since NaT, Decimal("NaN"), etc. aren't valid
+        # If they are present, they are treated like a regular Python object
+        # and will either cause an exception to be raised or be coerced.
+        check_null = strict_na_values.__contains__
+    else:
+        check_null = checknull
+
     for i in range(n):
         val = arr[i]
 
         if isinstance(val, str):
             continue
 
-        if not (val is None or val is C_NA or val is np.nan):
-            # We don't use checknull, since NaT, Decimal("NaN"), etc. aren't valid
-            # If they are present, they are treated like a regular Python object
-            # and will either cause an exception to be raised or be coerced.
+        if not check_null(val):
             if coerce =="all" or coerce == "non-null":
                 result[i] = str(val)
             else:
                 raise ValueError("Non-string element encountered in array.")
         else:
-            if coerce=="all" or coerce == "null":
+            if coerce=="all" or coerce == "null" or coerce == 'strict-null':
                 val = na_value
             if skipna:
                 result[i] = val
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 79ddd12476323..d0ea1aa5c5293 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -230,8 +230,11 @@ def _validate(self):
             )
         try:
             lib.ensure_string_array(
-                self._ndarray, na_value=StringDtype.na_value, coerce="null", copy=False
-            ),
+                self._ndarray,
+                na_value=StringDtype.na_value,
+                coerce="strict-null",
+                copy=False,
+            )
         except ValueError:
             raise ValueError("StringArray requires a sequence of strings or pandas.NA")
 

From 41f49d21d8da2bbdcc37d33714d009ea2b862049 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Fri, 4 Jun 2021 12:40:56 -0700
Subject: [PATCH 14/28] add to _from_sequence and fixes

---
 doc/source/whatsnew/v1.3.0.rst     |  2 +-
 pandas/core/arrays/string_.py      | 18 ++++++++++++++----
 pandas/core/arrays/string_arrow.py | 16 +++++++++++++---
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 93e27a7318f2d..4c5175b8e1bcc 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -644,7 +644,7 @@ Other API changes
 - Partially initialized :class:`CategoricalDtype` (i.e. those with ``categories=None`` objects will no longer compare as equal to fully initialized dtype objects.
 - Accessing ``_constructor_expanddim`` on a :class:`DataFrame` and ``_constructor_sliced`` on a :class:`Series` now raise an ``AttributeError``. Previously a ``NotImplementedError`` was raised (:issue:`38782`)
 - Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as ``turbodbc`` (:issue:`36893`)
-- :class:`StringArray` now accepts nan-likes(``None``, ``nan``, ``NaT``, ``NA``, Decimal("NaN")) in its constructor in addition to strings.
+- :class:`StringArray` now accepts nan-likes(``None``, ``nan``, ``NA``) in its constructor in addition to strings.
 - Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`)
 
 
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index d0ea1aa5c5293..4d97035714ba3 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -145,7 +145,7 @@ class StringArray(PandasArray):
 
            Currently, this expects an object-dtype ndarray
            where the elements are Python strings
-           or nan-likes(``None``, ``nan``, ``NaT``, ``NA``, Decimal("NaN")).
+           or nan-likes(``None``, ``nan``, ``NA``).
            This may change without warning in the future. Use
            :meth:`pandas.array` with ``dtype="string"`` for a stable way of
            creating a `StringArray` from any sequence.
@@ -239,7 +239,9 @@ def _validate(self):
             raise ValueError("StringArray requires a sequence of strings or pandas.NA")
 
     @classmethod
-    def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
+    def _from_sequence(
+        cls, scalars, *, dtype: Dtype | None = None, copy=False, coerce=True
+    ):
         if dtype:
             assert dtype == "string"
 
@@ -247,15 +249,23 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
 
         if isinstance(scalars, BaseMaskedArray):
             # avoid costly conversion to object dtype
+            if coerce:
+                coerce = "non-null"
+            else:
+                coerce = None
             na_values = scalars._mask
             result = scalars._data
-            result = lib.ensure_string_array(result, copy=copy, coerce="non-null")
+            result = lib.ensure_string_array(result, copy=copy, coerce=coerce)
             result[na_values] = StringDtype.na_value
 
         else:
             # convert non-na-likes to str, and nan-likes to StringDtype.na_value
+            if coerce:
+                coerce = "all"
+            else:
+                coerce = "strict-null"
             result = lib.ensure_string_array(
-                scalars, na_value=StringDtype.na_value, copy=copy
+                scalars, na_value=StringDtype.na_value, copy=copy, coerce=coerce
             )
 
         # Manually creating new array avoids the validation step in the __init__, so is
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 7aeadbb4c4616..f0af7a8a43594 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -237,7 +237,9 @@ def __init__(self, values):
             )
 
     @classmethod
-    def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False):
+    def _from_sequence(
+        cls, scalars, dtype: Dtype | None = None, copy: bool = False, coerce=True
+    ):
         from pandas.core.arrays.masked import BaseMaskedArray
 
         _chk_pyarrow_available()
@@ -247,11 +249,19 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
             # numerical issues with Float32Dtype
             na_values = scalars._mask
             result = scalars._data
-            result = lib.ensure_string_array(result, copy=copy, coerce="non-null")
+            if coerce:
+                coerce = "non-null"
+            else:
+                coerce = None
+            result = lib.ensure_string_array(result, copy=copy, coerce=coerce)
             return cls(pa.array(result, mask=na_values, type=pa.string()))
 
         # convert non-na-likes to str
-        result = lib.ensure_string_array(scalars, copy=copy)
+        if coerce:
+            coerce = "all"
+        else:
+            coerce = "strict-null"
+        result = lib.ensure_string_array(scalars, copy=copy, coerce=coerce)
         return cls(pa.array(result, type=pa.string(), from_pandas=True))
 
     @classmethod

From 62cc5be5dde0b71c4cbb006378cbf27363a2577d Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Fri, 4 Jun 2021 16:49:58 -0700
Subject: [PATCH 15/28] address code review

---
 asv_bench/benchmarks/strings.py            | 7 +++++++
 pandas/_libs/lib.pyx                       | 2 ++
 pandas/tests/arrays/string_/test_string.py | 7 +++++++
 3 files changed, 16 insertions(+)

diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 2e109e59c1c6d..765697f1cc505 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -7,6 +7,7 @@
     DataFrame,
     Series,
 )
+from pandas.core.arrays import StringArray
 
 from .pandas_vb_common import tm
 
@@ -61,6 +62,12 @@ def time_cat_frame_construction(self, dtype):
     def peakmem_cat_frame_construction(self, dtype):
         DataFrame(self.frame_cat_arr, dtype=dtype)
 
+    def time_string_array_construction(self):
+        StringArray(self.series_arr)
+
+    def peakmem_stringarray_construction(self):
+        StringArray(self.series_arr)
+
 
 class Methods(Dtypes):
     def time_center(self, dtype):
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index c68758f990ad2..b73c8cbe0f018 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -719,6 +719,8 @@ cpdef ndarray[object] ensure_string_array(
     np.ndarray[object]
         An array with the input array's elements casted to str or nan-like.
     """
+    if coerce not in {"all", "strict-null", "null", "non-null", None}:
+        raise ValueError("coerce argument is not valid")
     cdef:
         Py_ssize_t i = 0, n = len(arr)
         set strict_na_values = {C_NA, np.nan, None}
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 26770fcc1bf62..822c7f79e15dc 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pytest
 
+import pandas._libs.lib as lib
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import is_dtype_equal
@@ -303,6 +304,12 @@ def test_constructor_nan_like(na):
     )
 
 
+def test_invalid_coerce_raises():
+    data = np.array(["a", "b'"], dtype=object)
+    with pytest.raises(ValueError, match="coerce argument is not valid"):
+        lib.ensure_string_array(data, coerce="abcd")
+
+
 @pytest.mark.parametrize("copy", [True, False])
 def test_from_sequence_no_mutate(copy, cls, request):
     if cls is ArrowStringArray and copy is False:

From 153b6b4d2c6015c2dedcc1692a4b3d4c53408ff9 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Fri, 4 Jun 2021 19:34:44 -0700
Subject: [PATCH 16/28] Fix failures

---
 pandas/core/construction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 330902b402324..85a634042658e 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -727,7 +727,7 @@ def _try_cast(
 
     elif dtype.kind == "U":
         # TODO: test cases with arr.dtype.kind in ["m", "M"]
-        return lib.ensure_string_array(arr, convert_na_value=False, copy=copy)
+        return lib.ensure_string_array(arr, coerce="non-null", copy=copy)
 
     elif dtype.kind in ["m", "M"]:
         return maybe_cast_to_datetime(arr, dtype)

From b27a839c3cae55ce9765b91cdc593684ede18a73 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Fri, 4 Jun 2021 19:55:13 -0700
Subject: [PATCH 17/28] maybe fix benchmarks?

---
 asv_bench/benchmarks/strings.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 765697f1cc505..0ac6b7643fa9d 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -62,10 +62,10 @@ def time_cat_frame_construction(self, dtype):
     def peakmem_cat_frame_construction(self, dtype):
         DataFrame(self.frame_cat_arr, dtype=dtype)
 
-    def time_string_array_construction(self):
+    def time_string_array_construction(self, dtype):
         StringArray(self.series_arr)
 
-    def peakmem_stringarray_construction(self):
+    def peakmem_stringarray_construction(self, dtype):
         StringArray(self.series_arr)
 
 

From ed5b9536d1ec30accfa23cc1726ec4fe2876fa1a Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Sat, 5 Jun 2021 12:43:42 -0700
Subject: [PATCH 18/28] Partially address code review

---
 doc/source/whatsnew/v1.3.0.rst             |  3 +--
 pandas/_libs/lib.pyx                       | 21 ++++++++++++++-------
 pandas/core/arrays/string_.py              |  8 ++++----
 pandas/tests/arrays/string_/test_string.py |  6 +++++-
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 247548cf7c9f1..34ea5bf25d9ac 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -644,10 +644,9 @@ Other API changes
 - Partially initialized :class:`CategoricalDtype` (i.e. those with ``categories=None`` objects will no longer compare as equal to fully initialized dtype objects.
 - Accessing ``_constructor_expanddim`` on a :class:`DataFrame` and ``_constructor_sliced`` on a :class:`Series` now raise an ``AttributeError``. Previously a ``NotImplementedError`` was raised (:issue:`38782`)
 - Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as ``turbodbc`` (:issue:`36893`)
-- :class:`StringArray` now accepts nan-likes(``None``, ``nan``, ``NA``) in its constructor in addition to strings.
+- :class:`StringArray` now accepts nan-likes(``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
 - Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`)
 
-
 Build
 =====
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index abb714d6b383a..d55cc37b09f87 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -688,9 +688,7 @@ cpdef ndarray[object] ensure_string_array(
         bint skipna=True,
 ):
     """
-    Checks that all elements in numpy array are string or null
-    and returns a new numpy array with object dtype
-    and only strings and na values if so. Otherwise, raise a ValueError.
+    Returns a new numpy array with object dtype and only strings and na values.
 
     Parameters
     ----------
@@ -698,7 +696,7 @@ cpdef ndarray[object] ensure_string_array(
         The values to be converted to str, if needed.
     na_value : Any, default np.nan
         The value to use for na. For example, np.nan or pd.NA.
-    coerce : {{'all', 'null', 'non-null', None}}, default 'all'
+    coerce : {'all', 'null', 'non-null', None}, default 'all'
         Whether to coerce non-string elements to strings.
             - 'all' will convert null values and non-null non-string values.
             - 'strict-null' will only convert pd.NA, np.nan, or None to na_value
@@ -717,10 +715,17 @@ cpdef ndarray[object] ensure_string_array(
     Returns
     -------
     np.ndarray[object]
-        An array with the input array's elements casted to str or nan-like.
+        An array of strings and na_value.
+
+    Raises
+    ------
+    ValueError
+        If an element is encountered that is not a string or valid NA value
+        and element is not coerced.
     """
     if coerce not in {"all", "strict-null", "null", "non-null", None}:
-        raise ValueError("coerce argument is not valid")
+        raise ValueError("coerce argument must be one of "
+                         f"'all'|'strict-null'|'null'|'non-null'|None, not {coerce}")
     cdef:
         Py_ssize_t i = 0, n = len(arr)
         set strict_na_values = {C_NA, np.nan, None}
@@ -753,7 +758,9 @@ cpdef ndarray[object] ensure_string_array(
             if coerce =="all" or coerce == "non-null":
                 result[i] = str(val)
             else:
-                raise ValueError("Non-string element encountered in array.")
+                raise ValueError(f"Element {val} is not a string or valid null."
+                                 "If you want it to be coerced to a string,"
+                                 "specify coerce='all'")
         else:
             if coerce=="all" or coerce == "null" or coerce == 'strict-null':
                 val = na_value
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 4d97035714ba3..e68b63346ce90 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -145,16 +145,16 @@ class StringArray(PandasArray):
 
            Currently, this expects an object-dtype ndarray
            where the elements are Python strings
-           or nan-likes(``None``, ``nan``, ``NA``).
+           or nan-likes(``None``, ``np.nan``, ``NA``).
            This may change without warning in the future. Use
            :meth:`pandas.array` with ``dtype="string"`` for a stable way of
            creating a `StringArray` from any sequence.
 
         .. versionchanged:: 1.3
 
-           StringArray now accepts nan-likes in the constructor in addition
-           to strings, whereas it only accepted strings and :attr:`pandas.NA`
-           before.
+           StringArray now accepts nan-likes(``None``, ``np.nan``) for the
+           ``values`` parameter in its constructor
+           in addition to strings and :attr:`pandas.NA`
 
     copy : bool, default False
         Whether to copy the array of data.
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 822c7f79e15dc..75e5203d34f55 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -306,7 +306,11 @@ def test_constructor_nan_like(na):
 
 def test_invalid_coerce_raises():
     data = np.array(["a", "b'"], dtype=object)
-    with pytest.raises(ValueError, match="coerce argument is not valid"):
+    with pytest.raises(
+        ValueError,
+        match="coerce argument must be one of "
+        "'all'|'strict-null'|'null'|'non-null'|None, not abcd",
+    ):
         lib.ensure_string_array(data, coerce="abcd")
 
 

From caa57050887a355daf53830d3899e866b98b17ce Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Sat, 5 Jun 2021 20:47:50 -0700
Subject: [PATCH 19/28] Test coerce=False

---
 pandas/tests/arrays/string_/test_string.py | 37 ++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 75e5203d34f55..0112ef2b2f5af 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -15,6 +15,7 @@
 
 import pandas as pd
 import pandas._testing as tm
+from pandas.core.arrays import BaseMaskedArray
 from pandas.core.arrays.string_arrow import (
     ArrowStringArray,
     ArrowStringDtype,
@@ -314,6 +315,42 @@ def test_invalid_coerce_raises():
         lib.ensure_string_array(data, coerce="abcd")
 
 
+@pytest.mark.parametrize(
+    "values",
+    [
+        np.array(["foo", "bar", pd.NA], dtype=object),
+        np.array(["foo", "bar", np.nan], dtype=object),
+        np.array(["foo", "bar", None], dtype=object),
+        BaseMaskedArray(
+            np.array(["foo", "bar", "garbage"]), np.array([False, False, True])
+        ),
+    ],
+)
+def test_from_sequence_no_coerce(cls, values):
+    expected = pd.arrays.StringArray(np.array(["foo", "bar", pd.NA], dtype=object))
+    result = cls._from_sequence(values, coerce=False)
+    # Use bare assert since classes are different
+    assert (result == expected).all()
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        np.array(["foo", "bar", pd.NaT], dtype=object),
+        np.array(["foo", "bar", np.datetime64("nat")], dtype=object),
+        np.array(["foo", "bar", float("nan")], dtype=object),
+    ],
+)
+def test_from_sequence_no_coerce_invalid(cls, values):
+    with pytest.raises(
+        ValueError,
+        match="Element .* is not a string or valid null."
+        "If you want it to be coerced to a string,"
+        "specify coerce='all'",
+    ):
+        cls._from_sequence(values, coerce=False)
+
+
 @pytest.mark.parametrize("copy", [True, False])
 def test_from_sequence_no_mutate(copy, cls, request):
     if cls is ArrowStringArray and copy is False:

From 2d75031a6ecb76aa9247c9925fe1ebe9de131eb1 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Mon, 7 Jun 2021 12:29:17 -0700
Subject: [PATCH 20/28] move benchmarks

---
 asv_bench/benchmarks/strings.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 0ac6b7643fa9d..8ebce086b9cf5 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -3,6 +3,7 @@
 import numpy as np
 
 from pandas import (
+    NA,
     Categorical,
     DataFrame,
     Series,
@@ -26,7 +27,6 @@ def setup(self, dtype):
 
 
 class Construction:
-
     params = ["str", "string"]
     param_names = ["dtype"]
 
@@ -62,12 +62,6 @@ def time_cat_frame_construction(self, dtype):
     def peakmem_cat_frame_construction(self, dtype):
         DataFrame(self.frame_cat_arr, dtype=dtype)
 
-    def time_string_array_construction(self, dtype):
-        StringArray(self.series_arr)
-
-    def peakmem_stringarray_construction(self, dtype):
-        StringArray(self.series_arr)
-
 
 class Methods(Dtypes):
     def time_center(self, dtype):
@@ -184,7 +178,6 @@ def time_isupper(self, dtype):
 
 
 class Repeat:
-
     params = ["int", "array"]
     param_names = ["repeats"]
 
@@ -199,7 +192,6 @@ def time_repeat(self, repeats):
 
 
 class Cat:
-
     params = ([0, 3], [None, ","], [None, "-"], [0.0, 0.001, 0.15])
     param_names = ["other_cols", "sep", "na_rep", "na_frac"]
 
@@ -224,7 +216,6 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
 
 
 class Contains(Dtypes):
-
     params = (Dtypes.params, [True, False])
     param_names = ["dtype", "regex"]
 
@@ -236,7 +227,6 @@ def time_contains(self, dtype, regex):
 
 
 class Split(Dtypes):
-
     params = (Dtypes.params, [True, False])
     param_names = ["dtype", "expand"]
 
@@ -252,7 +242,6 @@ def time_rsplit(self, dtype, expand):
 
 
 class Extract(Dtypes):
-
     params = (Dtypes.params, [True, False])
     param_names = ["dtype", "expand"]
 
@@ -294,3 +283,18 @@ class Iter(Dtypes):
     def time_iter(self, dtype):
         for i in self.s:
             pass
+
+
+class StringArrayConstruction:
+    def setup(self):
+        self.series_arr = tm.rands_array(nchars=10, size=10 ** 5)
+        self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
+
+    def time_string_array_construction(self):
+        StringArray(self.series_arr)
+
+    def time_string_array_with_nan_construction(self):
+        StringArray(self.series_arr_nan)
+
+    def peakmem_stringarray_construction(self):
+        StringArray(self.series_arr)

From 52a00d1b01bbc639b4bf1fb323916ca79d4f9c0d Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Mon, 7 Jun 2021 12:34:22 -0700
Subject: [PATCH 21/28] accidental formatting changes

---
 asv_bench/benchmarks/strings.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 8ebce086b9cf5..82278474ab337 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -27,6 +27,7 @@ def setup(self, dtype):
 
 
 class Construction:
+
     params = ["str", "string"]
     param_names = ["dtype"]
 
@@ -178,6 +179,7 @@ def time_isupper(self, dtype):
 
 
 class Repeat:
+
     params = ["int", "array"]
     param_names = ["repeats"]
 
@@ -192,6 +194,7 @@ def time_repeat(self, repeats):
 
 
 class Cat:
+
     params = ([0, 3], [None, ","], [None, "-"], [0.0, 0.001, 0.15])
     param_names = ["other_cols", "sep", "na_rep", "na_frac"]
 
@@ -216,6 +219,7 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
 
 
 class Contains(Dtypes):
+
     params = (Dtypes.params, [True, False])
     param_names = ["dtype", "regex"]
 
@@ -227,6 +231,7 @@ def time_contains(self, dtype, regex):
 
 
 class Split(Dtypes):
+
     params = (Dtypes.params, [True, False])
     param_names = ["dtype", "expand"]
 
@@ -242,6 +247,7 @@ def time_rsplit(self, dtype, expand):
 
 
 class Extract(Dtypes):
+
     params = (Dtypes.params, [True, False])
     param_names = ["dtype", "expand"]
 

From 8dc0b66fb3f0d54baef0d1468ffbc8fc5800fec0 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Mon, 7 Jun 2021 21:01:59 -0700
Subject: [PATCH 22/28] Fix

---
 doc/source/whatsnew/v1.3.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 34ea5bf25d9ac..13e5a37b906c7 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -644,8 +644,8 @@ Other API changes
 - Partially initialized :class:`CategoricalDtype` (i.e. those with ``categories=None`` objects will no longer compare as equal to fully initialized dtype objects.
 - Accessing ``_constructor_expanddim`` on a :class:`DataFrame` and ``_constructor_sliced`` on a :class:`Series` now raise an ``AttributeError``. Previously a ``NotImplementedError`` was raised (:issue:`38782`)
 - Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as ``turbodbc`` (:issue:`36893`)
-- :class:`StringArray` now accepts nan-likes(``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
 - Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`)
+- :class:`StringArray` now accepts nan-likes(``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
 
 Build
 =====

From 66be08780e37f6d26fa2f4817e20430bac8974be Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Tue, 8 Jun 2021 10:48:56 -0700
Subject: [PATCH 23/28] missing import from conflict

---
 pandas/tests/arrays/string_/test_string.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 8bd46b89c2a3c..934c9d96da442 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -15,6 +15,7 @@
 
 import pandas as pd
 import pandas._testing as tm
+from pandas.core.arrays import BaseMaskedArray
 from pandas.core.arrays.string_arrow import ArrowStringArray
 
 

From 3c5709438457cd5c45b8cde0282c113d55f1dfcc Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Wed, 21 Jul 2021 09:51:57 -0700
Subject: [PATCH 24/28] remove old whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 77 ++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 36 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 722fac61a9495..ed66861efad93 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -1,7 +1,7 @@
 .. _whatsnew_130:
 
-What's new in 1.3.0 (??)
-------------------------
+What's new in 1.3.0 (July 2, 2021)
+----------------------------------
 
 These are the changes in pandas 1.3.0. See :ref:`release` for a full changelog
 including other versions of pandas.
@@ -124,7 +124,7 @@ which has been revised and improved (:issue:`39720`, :issue:`39317`, :issue:`404
  - The methods :meth:`.Styler.highlight_null`, :meth:`.Styler.highlight_min`, and :meth:`.Styler.highlight_max` now allow custom CSS highlighting instead of the default background coloring (:issue:`40242`)
  - :meth:`.Styler.apply` now accepts functions that return an ``ndarray`` when ``axis=None``, making it now consistent with the ``axis=0`` and ``axis=1`` behavior (:issue:`39359`)
  - When incorrectly formatted CSS is given via :meth:`.Styler.apply` or :meth:`.Styler.applymap`, an error is now raised upon rendering (:issue:`39660`)
- - :meth:`.Styler.format` now accepts the keyword argument ``escape`` for optional HTML and LaTex escaping (:issue:`40388`, :issue:`41619`)
+ - :meth:`.Styler.format` now accepts the keyword argument ``escape`` for optional HTML and LaTeX escaping (:issue:`40388`, :issue:`41619`)
  - :meth:`.Styler.background_gradient` has gained the argument ``gmap`` to supply a specific gradient map for shading (:issue:`22727`)
  - :meth:`.Styler.clear` now clears :attr:`Styler.hidden_index` and :attr:`Styler.hidden_columns` as well (:issue:`40484`)
  - Added the method :meth:`.Styler.highlight_between` (:issue:`39821`)
@@ -136,7 +136,7 @@ which has been revised and improved (:issue:`39720`, :issue:`39317`, :issue:`404
  - Many features of the :class:`.Styler` class are now either partially or fully usable on a DataFrame with a non-unique indexes or columns (:issue:`41143`)
  - One has greater control of the display through separate sparsification of the index or columns using the :ref:`new styler options <options.available>`, which are also usable via :func:`option_context` (:issue:`41142`)
  - Added the option ``styler.render.max_elements`` to avoid browser overload when styling large DataFrames (:issue:`40712`)
- - Added the method :meth:`.Styler.to_latex` (:issue:`21673`), which also allows some limited CSS conversion (:issue:`40731`)
+ - Added the method :meth:`.Styler.to_latex` (:issue:`21673`, :issue:`42320`), which also allows some limited CSS conversion (:issue:`40731`)
  - Added the method :meth:`.Styler.to_html` (:issue:`13379`)
  - Added the method :meth:`.Styler.set_sticky` to make index and column headers permanently visible in scrolling HTML frames (:issue:`29072`)
 
@@ -252,7 +252,7 @@ Other enhancements
 - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
 - Added support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
 - :func:`read_excel` can now auto-detect .xlsb files and older .xls files (:issue:`35416`, :issue:`41225`)
-- :class:`ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
+- :class:`ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behavior of append mode when writing to existing sheets (:issue:`40230`)
 - :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.ExponentialMovingWindow.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`38895`, :issue:`41267`)
 - :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
 - :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
@@ -276,7 +276,9 @@ Other enhancements
 - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
 - :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)
 - Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
+- :meth:`Series.between` can now accept ``left`` or ``right`` as arguments to ``inclusive`` to include only the left or right boundary (:issue:`40245`)
 - :meth:`DataFrame.explode` now supports exploding multiple columns. Its ``column`` argument now also accepts a list of str or tuples for exploding on multiple columns at the same time (:issue:`39240`)
+- :meth:`DataFrame.sample` now accepts the ``ignore_index`` argument to reset the index after sampling, similar to :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.sort_values` (:issue:`38581`)
 
 .. ---------------------------------------------------------------------------
 
@@ -305,7 +307,7 @@ As an example of this, given:
         original = pd.Series(cat)
         unique = original.unique()
 
-*pandas < 1.3.0*:
+*Previous behavior*:
 
 .. code-block:: ipython
 
@@ -315,7 +317,7 @@ As an example of this, given:
     In [2]: original.dtype == unique.dtype
     False
 
-*pandas >= 1.3.0*
+*New behavior*:
 
 .. ipython:: python
 
@@ -337,7 +339,7 @@ Preserve dtypes in :meth:`DataFrame.combine_first`
    df2
    combined = df1.combine_first(df2)
 
-*pandas 1.2.x*
+*Previous behavior*:
 
 .. code-block:: ipython
 
@@ -348,7 +350,7 @@ Preserve dtypes in :meth:`DataFrame.combine_first`
    C    float64
    dtype: object
 
-*pandas 1.3.0*
+*New behavior*:
 
 .. ipython:: python
 
@@ -371,7 +373,7 @@ values as measured by ``np.allclose``. Now no such casting occurs.
     df = pd.DataFrame({'key': [1, 1], 'a': [True, False], 'b': [True, True]})
     df
 
-*pandas 1.2.x*
+*Previous behavior*:
 
 .. code-block:: ipython
 
@@ -381,7 +383,7 @@ values as measured by ``np.allclose``. Now no such casting occurs.
     key
     1    True  2
 
-*pandas 1.3.0*
+*New behavior*:
 
 .. ipython:: python
 
@@ -399,7 +401,7 @@ Now, these methods will always return a float dtype. (:issue:`41137`)
 
     df = pd.DataFrame({'a': [True], 'b': [1], 'c': [1.0]})
 
-*pandas 1.2.x*
+*Previous behavior*:
 
 .. code-block:: ipython
 
@@ -408,7 +410,7 @@ Now, these methods will always return a float dtype. (:issue:`41137`)
             a  b    c
     0    True  1  1.0
 
-*pandas 1.3.0*
+*New behavior*:
 
 .. ipython:: python
 
@@ -432,7 +434,7 @@ insert the values into the existing data rather than create an entirely new arra
 In both the new and old behavior, the data in ``values`` is overwritten, but in
 the old behavior the dtype of ``df["A"]`` changed to ``int64``.
 
-*pandas 1.2.x*
+*Previous behavior*:
 
 .. code-block:: ipython
 
@@ -447,7 +449,7 @@ the old behavior the dtype of ``df["A"]`` changed to ``int64``.
 
 In pandas 1.3.0, ``df`` continues to share data with ``values``
 
-*pandas 1.3.0*
+*New behavior*:
 
 .. ipython:: python
 
@@ -474,7 +476,7 @@ never casting to the dtypes of the existing arrays.
 In the old behavior, ``5`` was cast to ``float64`` and inserted into the existing
 array backing ``df``:
 
-*pandas 1.2.x*
+*Previous behavior*:
 
 .. code-block:: ipython
 
@@ -484,7 +486,7 @@ array backing ``df``:
 
 In the new behavior, we get a new array, and retain an integer-dtyped ``5``:
 
-*pandas 1.3.0*
+*New behavior*:
 
 .. ipython:: python
 
@@ -507,7 +509,7 @@ casts to ``dtype=object`` (:issue:`38709`)
    ser2 = orig.copy()
    ser2.iloc[1] = 2.0
 
-*pandas 1.2.x*
+*Previous behavior*:
 
 .. code-block:: ipython
 
@@ -523,7 +525,7 @@ casts to ``dtype=object`` (:issue:`38709`)
    1     2.0
    dtype: object
 
-*pandas 1.3.0*
+*New behavior*:
 
 .. ipython:: python
 
@@ -705,7 +707,8 @@ Other API changes
 - Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as `turbodbc <https://turbodbc.readthedocs.io/en/latest/>`_ (:issue:`36893`)
 - Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`)
 - :meth:`ExtensionDtype.construct_array_type` is now a required method instead of an optional one for :class:`ExtensionDtype` subclasses (:issue:`24860`)
-- :class:`StringArray` now accepts nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
+- Calling ``hash`` on non-hashable pandas objects will now raise ``TypeError`` with the built-in error message (e.g. ``unhashable type: 'Series'``). Previously it would raise a custom message such as ``'Series' objects are mutable, thus they cannot be hashed``. Furthermore, ``isinstance(<Series>, abc.collections.Hashable)`` will now return ``False`` (:issue:`40013`)
+- :meth:`.Styler.from_custom_template` now has two new arguments for template names, and removed the old ``name``, due to template inheritance having been introducing for better parsing (:issue:`42053`). Subclassing modifications to Styler attributes are also needed.
 
 .. _whatsnew_130.api_breaking.build:
 
@@ -787,6 +790,8 @@ For example:
     1   2
     2  12
 
+*Future behavior*:
+
 .. code-block:: ipython
 
     In [5]: gb.prod(numeric_only=False)
@@ -816,8 +821,8 @@ Other Deprecations
 - Deprecated :meth:`ExponentialMovingWindow.vol` (:issue:`39220`)
 - Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`)
 - Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`)
-- Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favour of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`, :issue:`40425`)
-- Deprecated :meth:`.Styler.where` in favour of using an alternative formulation with :meth:`Styler.applymap` (:issue:`40821`)
+- Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favor of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`, :issue:`40425`)
+- Deprecated :meth:`.Styler.where` in favor of using an alternative formulation with :meth:`Styler.applymap` (:issue:`40821`)
 - Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`)
 - Deprecated arguments ``error_bad_lines`` and ``warn_bad_lines`` in :meth:`read_csv` and :meth:`read_table` in favor of argument ``on_bad_lines`` (:issue:`15122`)
 - Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`)
@@ -839,6 +844,7 @@ Other Deprecations
 - Deprecated inference of ``timedelta64[ns]``, ``datetime64[ns]``, or ``DatetimeTZDtype`` dtypes in :class:`Series` construction when data containing strings is passed and no ``dtype`` is passed (:issue:`33558`)
 - In a future version, constructing :class:`Series` or :class:`DataFrame` with ``datetime64[ns]`` data and ``DatetimeTZDtype`` will treat the data as wall-times instead of as UTC times (matching DatetimeIndex behavior). To treat the data as UTC times, use ``pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(dtype.tz)`` or ``pd.Series(data.view("int64"), dtype=dtype)`` (:issue:`33401`)
 - Deprecated passing lists as ``key`` to :meth:`DataFrame.xs` and :meth:`Series.xs` (:issue:`41760`)
+- Deprecated boolean arguments of ``inclusive`` in :meth:`Series.between` to have ``{"left", "right", "neither", "both"}`` as standard argument values (:issue:`40628`)
 - Deprecated passing arguments as positional for all of the following, with exceptions noted (:issue:`41485`):
 
   - :func:`concat` (other than ``objs``)
@@ -885,7 +891,7 @@ Performance improvements
 - Performance improvement in :class:`.Styler` where render times are more than 50% reduced and now matches :meth:`DataFrame.to_html` (:issue:`39972` :issue:`39952`, :issue:`40425`)
 - The method :meth:`.Styler.set_td_classes` is now as performant as :meth:`.Styler.apply` and :meth:`.Styler.applymap`, and even more so in some cases (:issue:`40453`)
 - Performance improvement in :meth:`.ExponentialMovingWindow.mean` with ``times`` (:issue:`39784`)
-- Performance improvement in :meth:`.GroupBy.apply` when requiring the python fallback implementation (:issue:`40176`)
+- Performance improvement in :meth:`.GroupBy.apply` when requiring the Python fallback implementation (:issue:`40176`)
 - Performance improvement in the conversion of a PyArrow Boolean array to a pandas nullable Boolean array (:issue:`41051`)
 - Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`)
 - Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`)
@@ -925,6 +931,7 @@ Datetimelike
 - Bug in :meth:`Timedelta.round`, :meth:`Timedelta.floor`, :meth:`Timedelta.ceil` for values near the implementation bounds of :class:`Timedelta` (:issue:`38964`)
 - Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`)
 - Bug in :func:`infer_freq` incorrectly fails to infer 'H' frequency of :class:`DatetimeIndex` if the latter has a timezone and crosses DST boundaries (:issue:`39556`)
+- Bug in :class:`Series` backed by :class:`DatetimeArray` or :class:`TimedeltaArray` sometimes failing to set the array's ``freq`` to ``None`` (:issue:`41425`)
 
 Timedelta
 ^^^^^^^^^
@@ -955,7 +962,8 @@ Numeric
 - Bug in :class:`Series` and :class:`DataFrame` reductions with methods ``any`` and ``all`` not returning Boolean results for object data (:issue:`12863`, :issue:`35450`, :issue:`27709`)
 - Bug in :meth:`Series.clip` would fail if the Series contains NA values and has nullable int or float as a data type (:issue:`40851`)
 - Bug in :meth:`UInt64Index.where` and :meth:`UInt64Index.putmask` with an ``np.int64`` dtype ``other`` incorrectly raising ``TypeError`` (:issue:`41974`)
-
+- Bug in :meth:`DataFrame.agg()` not sorting the aggregated axis in the order of the provided aggregation functions when one or more aggregation function fails to produce results (:issue:`33634`)
+- Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`)
 
 Conversion
 ^^^^^^^^^^
@@ -971,6 +979,12 @@ Conversion
 - Bug in :class:`DataFrame` and :class:`Series` construction with ``datetime64[ns]`` data and ``dtype=object`` resulting in ``datetime`` objects instead of :class:`Timestamp` objects (:issue:`41599`)
 - Bug in :class:`DataFrame` and :class:`Series` construction with ``timedelta64[ns]`` data and ``dtype=object`` resulting in ``np.timedelta64`` objects instead of :class:`Timedelta` objects (:issue:`41599`)
 - Bug in :class:`DataFrame` construction when given a two-dimensional object-dtype ``np.ndarray`` of :class:`Period` or :class:`Interval` objects failing to cast to :class:`PeriodDtype` or :class:`IntervalDtype`, respectively (:issue:`41812`)
+- Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`)
+- Bug in creating a :class:`Series` from a ``range`` object that does not fit in the bounds of ``int64`` dtype (:issue:`30173`)
+- Bug in creating a :class:`Series` from a ``dict`` with all-tuple keys and an :class:`Index` that requires reindexing (:issue:`41707`)
+- Bug in :func:`.infer_dtype` not recognizing Series, Index, or array with a Period dtype (:issue:`23553`)
+- Bug in :func:`.infer_dtype` raising an error for general :class:`.ExtensionArray` objects. It will now return ``"unknown-array"`` instead of raising (:issue:`37367`)
+- Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised a ``ValueError`` when called on an empty DataFrame (:issue:`40393`)
 
 Strings
 ^^^^^^^
@@ -1032,6 +1046,8 @@ Indexing
 - Bug ``.loc.__getitem__`` with a :class:`UInt64Index` and negative-integer keys raising ``OverflowError`` instead of ``KeyError`` in some cases, wrapping around to positive integers in others (:issue:`41777`)
 - Bug in :meth:`Index.get_indexer` failing to raise ``ValueError`` in some cases with invalid ``method``, ``limit``, or ``tolerance`` arguments (:issue:`41918`)
 - Bug when slicing a :class:`Series` or :class:`DataFrame` with a :class:`TimedeltaIndex` when passing an invalid string raising ``ValueError`` instead of a ``TypeError`` (:issue:`41821`)
+- Bug in :class:`Index` constructor sometimes silently ignoring a specified ``dtype`` (:issue:`38879`)
+- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`)
 
 Missing
 ^^^^^^^
@@ -1201,24 +1217,13 @@ Styler
 
 Other
 ^^^^^
-- Bug in :class:`Index` constructor sometimes silently ignoring a specified ``dtype`` (:issue:`38879`)
-- Bug in :func:`.infer_dtype` not recognizing Series, Index, or array with a Period dtype (:issue:`23553`)
-- Bug in :func:`.infer_dtype` raising an error for general :class:`.ExtensionArray` objects. It will now return ``"unknown-array"`` instead of raising (:issue:`37367`)
-- Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`)
 - ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`)
 - Bug in :meth:`Series.where` with numeric dtype and ``other=None`` not casting to ``nan`` (:issue:`39761`)
-- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`)
 - Bug in :func:`.assert_series_equal`, :func:`.assert_frame_equal`, :func:`.assert_index_equal` and :func:`.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`)
 - Bug in :func:`.assert_index_equal` with ``exact=True`` not raising when comparing :class:`CategoricalIndex` instances with ``Int64Index`` and ``RangeIndex`` categories (:issue:`41263`)
 - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, and :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)
 - Bug in :func:`show_versions` where console JSON output was not proper JSON (:issue:`39701`)
 - pandas can now compile on z/OS when using `xlc <https://www.ibm.com/products/xl-cpp-compiler-zos>`_ (:issue:`35826`)
-- Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised a ``ValueError`` when called on an empty DataFrame (:issue:`40393`)
-- Bug in :meth:`DataFrame.agg()` not sorting the aggregated axis in the order of the provided aggragation functions when one or more aggregation function fails to produce results (:issue:`33634`)
-- Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`)
-- Bug in :class:`Series` backed by :class:`DatetimeArray` or :class:`TimedeltaArray` sometimes failing to set the array's ``freq`` to ``None`` (:issue:`41425`)
-- Bug in creating a :class:`Series` from a ``range`` object that does not fit in the bounds of ``int64`` dtype (:issue:`30173`)
-- Bug in creating a :class:`Series` from a ``dict`` with all-tuple keys and an :class:`Index` that requires reindexing (:issue:`41707`)
 - Bug in :func:`pandas.util.hash_pandas_object` not recognizing ``hash_key``, ``encoding`` and ``categorize`` when the input object type is a :class:`DataFrame` (:issue:`41404`)
 
 .. ---------------------------------------------------------------------------
@@ -1228,4 +1233,4 @@ Other
 Contributors
 ~~~~~~~~~~~~
 
-.. contributors:: v1.2.4..v1.3.0|HEAD
+.. contributors:: v1.2.5..v1.3.0

From 12351de548d50013fb55d17c7ab97e8b70f1fcab Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Wed, 21 Jul 2021 09:53:00 -0700
Subject: [PATCH 25/28] move whatsnew

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 8d96d49daba4f..21328461f9cf2 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -142,7 +142,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
 Other API changes
 ^^^^^^^^^^^^^^^^^
 - :meth:`Index.get_indexer_for` no longer accepts keyword arguments (other than 'target'); in the past these would be silently ignored if the index was not unique (:issue:`42310`)
--
+- :class:`StringArray` now accepts nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
 
 .. ---------------------------------------------------------------------------
 

From 358000f79fe8c1c456c1a7e1090e7aaaffa223b1 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Mon, 4 Oct 2021 13:31:41 -0700
Subject: [PATCH 26/28] typo

---
 pandas/_libs/lib.pyx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index c580ef0269d89..09f8398212168 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -758,11 +758,11 @@ cpdef ndarray[object] ensure_string_array(
         if not check_null(val):
             if coerce =="all" or coerce == "non-null":
                 if not isinstance(val, np.floating):
-                # f"{val}" is faster than str(val)
-                result[i] = f"{val}"
-            else:
-                # f"{val}" is not always equivalent to str(val) for floats
-                result[i] = str(val)
+                    # f"{val}" is faster than str(val)
+                    result[i] = f"{val}"
+                else:
+                    # f"{val}" is not always equivalent to str(val) for floats
+                    result[i] = str(val)
             else:
                 raise ValueError(f"Element {val} is not a string or valid null."
                                  "If you want it to be coerced to a string,"

From 20817a790c8fadeca336b9ad25d8db34bfea3efd Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Mon, 27 Dec 2021 09:21:45 -0800
Subject: [PATCH 27/28] address comments

---
 pandas/_libs/lib.pyx          | 53 +++++++++++++++++++++++++++++------
 pandas/core/arrays/string_.py |  2 +-
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 7974e732a5e0d..8939b2853d0ef 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -671,6 +671,14 @@ def astype_intsafe(ndarray[object] arr, cnp.dtype new_dtype) -> ndarray:
     return result
 
 
+ctypedef enum coerce_options:
+    all = 0
+    strict_null = 1
+    null = 2
+    non_null = 3
+    none = 4
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cpdef ndarray[object] ensure_string_array(
@@ -689,11 +697,11 @@ cpdef ndarray[object] ensure_string_array(
         The values to be converted to str, if needed.
     na_value : Any, default np.nan
         The value to use for na. For example, np.nan or pd.NA.
-    coerce : {'all', 'null', 'non-null', None}, default 'all'
+    coerce : {'all', 'strict-null', 'null', 'non-null', None}, default 'all'
         Whether to coerce non-string elements to strings.
-            - 'all' will convert null values and non-null non-string values.
+            - 'all' will convert all non-string values.
             - 'strict-null' will only convert pd.NA, np.nan, or None to na_value
-              without converting other non-strings.
+              raising when encountering non-strings and other null values.
             - 'null' will convert nulls to na_value w/out converting other non-strings.
             - 'non-null' will only convert non-null non-string elements to string.
             - None will not convert anything.
@@ -715,13 +723,40 @@ cpdef ndarray[object] ensure_string_array(
     ValueError
         If an element is encountered that is not a string or valid NA value
         and element is not coerced.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> ensure_string_array(np.array([1,2,3, np.datetime64("nat")]), coerce="all")
+    array("1", "2", "3", np.nan)
+    >>> ensure_string_array(np.array([pd.NA, "a", None]), coerce="strict-null")
+    array(np.nan, "a", np.nan)
+    >>> ensure_string_array(np.array([pd.NaT, "1"]), coerce="null")
+    array(np.nan, "1")
+    >>> ensure_string_array(np.array([1,2,3]), coerce="non-null")
+    array("1", "2", "3")
+    >>> ensure_string_array(np.array(["1", "2", "3"]), coerce=None)
+    array("1", "2", "3")
     """
-    if coerce not in {"all", "strict-null", "null", "non-null", None}:
-        raise ValueError("coerce argument must be one of "
-                         f"'all'|'strict-null'|'null'|'non-null'|None, not {coerce}")
     cdef:
         Py_ssize_t i = 0, n = len(arr)
         set strict_na_values = {C_NA, np.nan, None}
+        coerce_options coerce_val
+
+    if coerce == "all":
+        coerce_val = all
+    elif coerce == "strict-null":
+        coerce_val = strict_null
+    elif coerce == "null":
+        coerce_val = null
+    elif coerce == "non-null":
+        coerce_val = non_null
+    elif coerce is None:
+        coerce_val = none
+    else:
+        raise ValueError("coerce argument must be one of "
+                         f"'all'|'strict-null'|'null'|'non-null'|None, not {coerce}")
 
     if hasattr(arr, "to_numpy"):
 
@@ -741,7 +776,7 @@ cpdef ndarray[object] ensure_string_array(
     if copy and result is arr:
         result = result.copy()
 
-    if coerce == 'strict-null':
+    if coerce_val == strict_null:
         # We don't use checknull, since NaT, Decimal("NaN"), etc. aren't valid
         # If they are present, they are treated like a regular Python object
         # and will either cause an exception to be raised or be coerced.
@@ -756,7 +791,7 @@ cpdef ndarray[object] ensure_string_array(
             continue
 
         if not check_null(val):
-            if coerce =="all" or coerce == "non-null":
+            if coerce_val == all or coerce_val == non_null:
                 if not isinstance(val, np.floating):
                     # f"{val}" is faster than str(val)
                     result[i] = f"{val}"
@@ -768,7 +803,7 @@ cpdef ndarray[object] ensure_string_array(
                                  "If you want it to be coerced to a string,"
                                  "specify coerce='all'")
         else:
-            if coerce=="all" or coerce == "null" or coerce == 'strict-null':
+            if coerce_val != non_null and coerce_val != none:
                 val = na_value
             if skipna:
                 result[i] = val
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index c2b659d752ad2..8fe5343e471ae 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -252,7 +252,7 @@ class StringArray(BaseStringArray, PandasArray):
            :meth:`pandas.array` with ``dtype="string"`` for a stable way of
            creating a `StringArray` from any sequence.
 
-        .. versionchanged:: 1.3
+        .. versionchanged:: 1.4.0
 
            StringArray now accepts nan-likes(``None``, ``np.nan``) for the
            ``values`` parameter in its constructor

From 33d8f9a6bb4d01b2a4c3c66cf40bdcd74e764888 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Mon, 27 Dec 2021 12:11:55 -0800
Subject: [PATCH 28/28] accept any float nan w/ util.is_nan

---
 pandas/_libs/lib.pyx                       | 17 +++++++++++------
 pandas/tests/arrays/string_/test_string.py | 10 +++++++---
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 8939b2853d0ef..e373c8a584913 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -679,6 +679,11 @@ ctypedef enum coerce_options:
     none = 4
 
 
+def strict_check_null(x):
+    # Cython doesn't let me define this in ensure_string_array :(
+    return x is None or x is C_NA or util.is_nan(x)
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cpdef ndarray[object] ensure_string_array(
@@ -729,15 +734,15 @@ cpdef ndarray[object] ensure_string_array(
     >>> import numpy as np
     >>> import pandas as pd
     >>> ensure_string_array(np.array([1,2,3, np.datetime64("nat")]), coerce="all")
-    array("1", "2", "3", np.nan)
+    array(['1', '2', '3', nan], dtype=object)
     >>> ensure_string_array(np.array([pd.NA, "a", None]), coerce="strict-null")
-    array(np.nan, "a", np.nan)
+    array([nan, 'a', nan], dtype=object)
     >>> ensure_string_array(np.array([pd.NaT, "1"]), coerce="null")
-    array(np.nan, "1")
+    array([nan, '1'], dtype=object)
     >>> ensure_string_array(np.array([1,2,3]), coerce="non-null")
-    array("1", "2", "3")
+    array(['1', '2', '3'], dtype=object)
     >>> ensure_string_array(np.array(["1", "2", "3"]), coerce=None)
-    array("1", "2", "3")
+    array(['1', '2', '3'], dtype=object)
     """
     cdef:
         Py_ssize_t i = 0, n = len(arr)
@@ -780,7 +785,7 @@ cpdef ndarray[object] ensure_string_array(
         # We don't use checknull, since NaT, Decimal("NaN"), etc. aren't valid
         # If they are present, they are treated like a regular Python object
         # and will either cause an exception to be raised or be coerced.
-        check_null = strict_na_values.__contains__
+        check_null = strict_check_null
     else:
         check_null = checknull
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 4a85daf653d53..10ff1c12d6fa8 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -2,6 +2,8 @@
 This module tests the functionality of StringArray and ArrowStringArray.
 Tests for the str accessors are in pandas/tests/strings/test_string_array.py
 """
+from decimal import Decimal
+
 import numpy as np
 import pytest
 
@@ -272,7 +274,7 @@ def test_constructor_raises(cls):
         cls(np.array(["a", pd.NaT], dtype=object))
 
 
-@pytest.mark.parametrize("na", [np.nan, None, pd.NA])
+@pytest.mark.parametrize("na", [np.nan, np.float64("nan"), float("nan"), None, pd.NA])
 def test_constructor_nan_like(na):
     expected = pd.arrays.StringArray(np.array(["a", pd.NA]))
     tm.assert_extension_array_equal(
@@ -281,7 +283,7 @@ def test_constructor_nan_like(na):
 
 
 def test_invalid_coerce_raises():
-    data = np.array(["a", "b'"], dtype=object)
+    data = np.array(["a", "b"], dtype=object)
     with pytest.raises(
         ValueError,
         match="coerce argument must be one of "
@@ -296,6 +298,8 @@ def test_invalid_coerce_raises():
         np.array(["foo", "bar", pd.NA], dtype=object),
         np.array(["foo", "bar", np.nan], dtype=object),
         np.array(["foo", "bar", None], dtype=object),
+        np.array(["foo", "bar", float("nan")], dtype=object),
+        np.array(["foo", "bar", np.float64("nan")], dtype=object),
         BaseMaskedArray(
             np.array(["foo", "bar", "garbage"]), np.array([False, False, True])
         ),
@@ -313,7 +317,7 @@ def test_from_sequence_no_coerce(cls, values):
     [
         np.array(["foo", "bar", pd.NaT], dtype=object),
         np.array(["foo", "bar", np.datetime64("nat")], dtype=object),
-        np.array(["foo", "bar", float("nan")], dtype=object),
+        np.array(["foo", "bar", Decimal("nan")], dtype=object),
     ],
 )
 def test_from_sequence_no_coerce_invalid(cls, values):