Skip to content

Commit b0ea2f1

Browse files
Split fastpath IntegerArray constructor and general purpose constructor
1 parent 2c7c797 commit b0ea2f1

File tree

6 files changed

+40
-33
lines changed

6 files changed

+40
-33
lines changed

pandas/core/arrays/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@
77
from .period import PeriodArrayMixin # noqa
88
from .timedeltas import TimedeltaArrayMixin # noqa
99
from .integer import ( # noqa
10-
IntegerArray, to_integer_array)
10+
IntegerArray, integer_array)

pandas/core/arrays/integer.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def construct_from_string(cls, string):
7676
"'{}'".format(cls, string))
7777

7878

79-
def to_integer_array(values, dtype=None):
79+
def integer_array(values, dtype=None, copy=False):
8080
"""
8181
Infer and return an integer array of the values.
8282
@@ -94,7 +94,8 @@ def to_integer_array(values, dtype=None):
9494
------
9595
TypeError if incompatible types
9696
"""
97-
return IntegerArray(values, dtype=dtype, copy=False)
97+
values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
98+
return IntegerArray(values, mask)
9899

99100

100101
def safe_cast(values, dtype, copy):
@@ -206,7 +207,7 @@ class IntegerArray(ExtensionArray, ExtensionOpsMixin):
206207
def dtype(self):
207208
return _dtypes[str(self._data.dtype)]
208209

209-
def __init__(self, values, mask=None, dtype=None, copy=False):
210+
def __init__(self, values, mask, copy=False):
210211
"""
211212
Parameters
212213
----------
@@ -219,25 +220,33 @@ def __init__(self, values, mask=None, dtype=None, copy=False):
219220
-------
220221
IntegerArray
221222
"""
222-
self._data, self._mask = coerce_to_array(
223-
values, dtype=dtype, mask=mask, copy=copy)
223+
if not (isinstance(values, np.ndarray)
224+
and np.issubdtype(values.dtype, np.integer)):
225+
raise TypeError("values should be integer numpy array")
226+
if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
227+
raise TypeError("mask should be boolean numpy array")
228+
229+
if copy:
230+
values = values.copy()
231+
mask = mask.copy()
232+
233+
self._data = values
234+
self._mask = mask
224235

225236
@classmethod
226237
def _from_sequence(cls, scalars, dtype=None, copy=False):
227-
return cls(scalars, dtype=dtype, copy=copy)
238+
return integer_array(scalars, dtype=dtype, copy=copy)
228239

229240
@classmethod
230241
def _from_factorized(cls, values, original):
231-
return cls(values, dtype=original.dtype)
242+
return integer_array(values, dtype=original.dtype)
232243

233244
def __getitem__(self, item):
234245
if is_integer(item):
235246
if self._mask[item]:
236247
return self.dtype.na_value
237248
return self._data[item]
238-
return type(self)(self._data[item],
239-
mask=self._mask[item],
240-
dtype=self.dtype)
249+
return type(self)(self._data[item], self._mask[item])
241250

242251
def _coerce_to_ndarray(self):
243252
"""
@@ -294,7 +303,7 @@ def take(self, indexer, allow_fill=False, fill_value=None):
294303
result[fill_mask] = fill_value
295304
mask = mask ^ fill_mask
296305

297-
return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
306+
return type(self)(result, mask, copy=False)
298307

299308
def copy(self, deep=False):
300309
data, mask = self._data, self._mask
@@ -304,7 +313,7 @@ def copy(self, deep=False):
304313
else:
305314
data = data.copy()
306315
mask = mask.copy()
307-
return type(self)(data, mask, dtype=self.dtype, copy=False)
316+
return type(self)(data, mask, copy=False)
308317

309318
def __setitem__(self, key, value):
310319
_is_scalar = is_scalar(value)
@@ -356,7 +365,7 @@ def _na_value(self):
356365
def _concat_same_type(cls, to_concat):
357366
data = np.concatenate([x._data for x in to_concat])
358367
mask = np.concatenate([x._mask for x in to_concat])
359-
return cls(data, mask=mask, dtype=to_concat[0].dtype)
368+
return cls(data, mask)
360369

361370
def astype(self, dtype, copy=True):
362371
"""Cast to a NumPy array or IntegerArray with 'dtype'.
@@ -386,8 +395,7 @@ def astype(self, dtype, copy=True):
386395
if isinstance(dtype, _IntegerDtype):
387396
result = self._data.astype(dtype.numpy_dtype,
388397
casting='same_kind', copy=False)
389-
return type(self)(result, mask=self._mask,
390-
dtype=dtype, copy=False)
398+
return type(self)(result, mask=self._mask, copy=False)
391399

392400
# coerce
393401
data = self._coerce_to_ndarray()
@@ -523,7 +531,7 @@ def _maybe_mask_result(self, result, mask, other, op_name):
523531
result[mask] = np.nan
524532
return result
525533

526-
return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
534+
return type(self)(result, mask, copy=False)
527535

528536
@classmethod
529537
def _create_arithmetic_method(cls, op):

pandas/core/indexes/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
280280
if not (dtype is None or is_object_dtype(dtype)):
281281

282282
# coerce to the provided dtype
283-
data = dtype.construct_array_type()(
283+
data = dtype.construct_array_type()._from_sequence(
284284
data, dtype=dtype, copy=False)
285285

286286
# coerce to the object dtype

pandas/core/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4095,7 +4095,7 @@ def _try_cast(arr, take_fast_path):
40954095
ordered=dtype.ordered)
40964096
elif is_extension_array_dtype(dtype):
40974097
# create an extension array from its dtype
4098-
array_type = dtype.construct_array_type()
4098+
array_type = dtype.construct_array_type()._from_sequence
40994099
subarr = array_type(subarr, dtype=dtype, copy=copy)
41004100

41014101
elif dtype is not None and raise_cast_failure:

pandas/tests/extension/base/missing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def test_fillna_series_method(self, data_missing, method):
9494
fill_value = data_missing[1]
9595

9696
if method == 'ffill':
97-
data_missing = type(data_missing)(data_missing[::-1])
97+
data_missing = data_missing[::-1]
9898

9999
result = pd.Series(data_missing).fillna(method=method)
100100
expected = pd.Series(

pandas/tests/extension/integer/test_integer.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pandas.core.dtypes.generic import ABCIndexClass
1010

1111
from pandas.core.arrays import (
12-
to_integer_array, IntegerArray)
12+
integer_array, IntegerArray)
1313
from pandas.core.arrays.integer import (
1414
Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
1515
UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype)
@@ -31,12 +31,12 @@ def dtype(request):
3131

3232
@pytest.fixture
3333
def data(dtype):
34-
return IntegerArray(make_data(), dtype=dtype)
34+
return integer_array(make_data(), dtype=dtype)
3535

3636

3737
@pytest.fixture
3838
def data_missing(dtype):
39-
return IntegerArray([np.nan, 1], dtype=dtype)
39+
return integer_array([np.nan, 1], dtype=dtype)
4040

4141

4242
@pytest.fixture
@@ -49,12 +49,12 @@ def gen(count):
4949

5050
@pytest.fixture
5151
def data_for_sorting(dtype):
52-
return IntegerArray([1, 2, 0], dtype=dtype)
52+
return integer_array([1, 2, 0], dtype=dtype)
5353

5454

5555
@pytest.fixture
5656
def data_missing_for_sorting(dtype):
57-
return IntegerArray([1, np.nan, 0], dtype=dtype)
57+
return integer_array([1, np.nan, 0], dtype=dtype)
5858

5959

6060
@pytest.fixture
@@ -74,7 +74,7 @@ def data_for_grouping(dtype):
7474
a = 0
7575
c = 2
7676
na = np.nan
77-
return IntegerArray([b, b, na, na, a, a, b, c], dtype=dtype)
77+
return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
7878

7979

8080
def test_dtypes(dtype):
@@ -494,8 +494,7 @@ def test_construct_index(self, all_data, dropna):
494494
else:
495495
other = all_data
496496

497-
result = pd.Index(IntegerArray(other,
498-
dtype=all_data.dtype))
497+
result = pd.Index(integer_array(other, dtype=all_data.dtype))
499498
expected = pd.Index(other, dtype=object)
500499

501500
self.assert_index_equal(result, expected)
@@ -584,14 +583,14 @@ def test_construct_cast_invalid(self, dtype):
584583
msg = "cannot safely"
585584
arr = [1.2, 2.3, 3.7]
586585
with tm.assert_raises_regex(TypeError, msg):
587-
IntegerArray(arr, dtype=dtype)
586+
integer_array(arr, dtype=dtype)
588587

589588
with tm.assert_raises_regex(TypeError, msg):
590589
pd.Series(arr).astype(dtype)
591590

592591
arr = [1.2, 2.3, 3.7, np.nan]
593592
with tm.assert_raises_regex(TypeError, msg):
594-
IntegerArray(arr, dtype=dtype)
593+
integer_array(arr, dtype=dtype)
595594

596595
with tm.assert_raises_regex(TypeError, msg):
597596
pd.Series(arr).astype(dtype)
@@ -658,7 +657,7 @@ def test_conversions(data_missing):
658657
def test_to_integer_array_error(values):
659658
# error in converting existing arrays to IntegerArrays
660659
with pytest.raises(TypeError):
661-
to_integer_array(values)
660+
integer_array(values)
662661

663662

664663
@pytest.mark.parametrize(
@@ -669,8 +668,8 @@ def test_to_integer_array_error(values):
669668
(np.array([1, np.nan]), 'int8', Int8Dtype)])
670669
def test_to_integer_array(values, to_dtype, result_dtype):
671670
# convert existing arrays to IntegerArrays
672-
result = to_integer_array(values, dtype=to_dtype)
673-
expected = IntegerArray(values, dtype=result_dtype())
671+
result = integer_array(values, dtype=to_dtype)
672+
expected = integer_array(values, dtype=result_dtype())
674673
tm.assert_extension_array_equal(result, expected)
675674

676675

0 commit comments

Comments
 (0)