1
1
import operator
2
- from typing import TYPE_CHECKING , Type , Union
2
+ from typing import TYPE_CHECKING , Optional , Type , Union
3
3
4
4
import numpy as np
5
5
@@ -122,6 +122,9 @@ class StringArray(PandasArray):
122
122
123
123
copy : bool, default False
124
124
Whether to copy the array of data.
125
+ convert : bool, default False
126
+ If true, force conversion of non-na scalars to strings.
127
+ If False, raises a ValueError, if a scalar is neither a string nor na.
125
128
126
129
Attributes
127
130
----------
@@ -162,7 +165,15 @@ class StringArray(PandasArray):
162
165
['1', '1']
163
166
Length: 2, dtype: string
164
167
165
- However, instantiating StringArrays directly with non-strings will raise an error.
168
+ Instantiating StringArrays directly with non-strings will raise an error unless
169
+ ``convert=True``.
170
+
171
+ >>> pd.arrays.StringArray(['1', 1])
172
+ TypeError: Argument 'values' has incorrect type (expected numpy.ndarray, got list)
173
+ >>> pd.arrays.StringArray(['1', 1], convert=True)
174
+ <StringArray>
175
+ ['1', '1']
176
+ Length: 2, dtype: string
166
177
167
178
For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:
168
179
@@ -175,22 +186,30 @@ class StringArray(PandasArray):
175
186
# undo the PandasArray hack
176
187
_typ = "extension"
177
188
178
- def __init__ (self , values , copy = False ):
189
+ def __init__ (self , values , copy = False , convert : bool = False ):
179
190
values = extract_array (values )
191
+ if not isinstance (values , type (self )):
192
+ if convert :
193
+ values = lib .ensure_string_array (
194
+ values , na_value = StringDtype .na_value , copy = copy
195
+ )
196
+ else :
197
+ self ._validate (values )
180
198
181
199
super ().__init__ (values , copy = copy )
182
200
self ._dtype = StringDtype ()
183
- if not isinstance (values , type (self )):
184
- self ._validate ()
185
201
186
- def _validate (self ) :
202
+ def _validate (self , values : Optional [ np . ndarray ] = None ) -> None :
187
203
"""Validate that we only store NA or strings."""
188
- if len (self ._ndarray ) and not lib .is_string_array (self ._ndarray , skipna = True ):
204
+ if values is None :
205
+ values = self ._ndarray
206
+
207
+ if len (values ) and not lib .is_string_array (values , skipna = True ):
189
208
raise ValueError ("StringArray requires a sequence of strings or pandas.NA" )
190
- if self . _ndarray .dtype != "object" :
209
+ if values .dtype != "object" :
191
210
raise ValueError (
192
211
"StringArray requires a sequence of strings or pandas.NA. Got "
193
- f"'{ self . _ndarray .dtype } ' dtype instead."
212
+ f"'{ values .dtype } ' dtype instead."
194
213
)
195
214
196
215
@classmethod
@@ -200,12 +219,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
200
219
201
220
result = np .asarray (scalars , dtype = "object" )
202
221
203
- # convert non-na-likes to str, and nan-likes to StringDtype.na_value
204
- result = lib .ensure_string_array (
205
- result , na_value = StringDtype .na_value , copy = copy
206
- )
207
-
208
- return cls (result )
222
+ return cls (result , copy = copy , convert = True )
209
223
210
224
@classmethod
211
225
def _from_sequence_of_strings (cls , strings , dtype = None , copy = False ):
0 commit comments