Skip to content

Commit 831555e

Browse files
committed
Merge remote-tracking branch 'upstream/master' into GH38454-export-stata-value-labels
2 parents 8bb29cb + 46ed60a commit 831555e

File tree

10 files changed

+40
-57
lines changed

10 files changed

+40
-57
lines changed

ci/deps/actions-38-db.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ dependencies:
1515
- beautifulsoup4
1616
- botocore>=1.11
1717
- dask
18-
- fastparquet>=0.4.0
18+
- fastparquet>=0.4.0, < 0.7.0
1919
- fsspec>=0.7.4, <2021.6.0
2020
- gcsfs>=0.6.0
2121
- geopandas

ci/deps/azure-windows-38.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ dependencies:
1515
# pandas dependencies
1616
- blosc
1717
- bottleneck
18-
- fastparquet>=0.4.0
18+
- fastparquet>=0.4.0, <0.7.0
1919
- flask
2020
- fsspec>=0.8.0, <2021.6.0
2121
- matplotlib=3.3.2

doc/source/whatsnew/v1.3.1.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Fixed regressions
2424
- Fixed regression in indexing with a ``list`` subclass incorrectly raising ``TypeError`` (:issue:`42433`, :issue:`42461`)
2525
- Fixed regression in :meth:`DataFrame.isin` and :meth:`Series.isin` raising ``TypeError`` with nullable data containing at least one missing value (:issue:`42405`)
2626
- Regression in :func:`concat` between objects with bool dtype and integer dtype casting to object instead of to integer (:issue:`42092`)
27+
- Bug in :class:`Series` constructor not accepting a ``dask.Array`` (:issue:`38645`)
2728

2829
.. ---------------------------------------------------------------------------
2930

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ dependencies:
9999
- xlwt
100100
- odfpy
101101

102-
- fastparquet>=0.4.0 # pandas.read_parquet, DataFrame.to_parquet
102+
- fastparquet>=0.4.0, <0.7.0 # pandas.read_parquet, DataFrame.to_parquet
103103
- pyarrow>=0.17.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
104104
- python-snappy # required by pyarrow
105105

pandas/core/construction.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,8 +560,11 @@ def sanitize_array(
560560
raise TypeError(f"'{type(data).__name__}' type is unordered")
561561

562562
# materialize e.g. generators, convert e.g. tuples, abc.ValueView
563-
# TODO: non-standard array-likes we can convert to ndarray more efficiently?
564-
data = list(data)
563+
if hasattr(data, "__array__"):
564+
# e.g. dask array GH#38645
565+
data = np.asarray(data)
566+
else:
567+
data = list(data)
565568

566569
if dtype is not None or len(data) == 0:
567570
subarr = _try_cast(data, dtype, copy, raise_cast_failure)

pandas/core/frame.py

Lines changed: 16 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
ColspaceArgType,
5656
CompressionOptions,
5757
Dtype,
58+
DtypeObj,
5859
FilePathOrBuffer,
5960
FillnaOptions,
6061
FloatFormatType,
@@ -4307,50 +4308,25 @@ def check_int_infer_dtype(dtypes):
43074308
if not include.isdisjoint(exclude):
43084309
raise ValueError(f"include and exclude overlap on {(include & exclude)}")
43094310

4310-
# We raise when both include and exclude are empty
4311-
# Hence, we can just shrink the columns we want to keep
4312-
keep_these = np.full(self.shape[1], True)
4313-
4314-
def extract_unique_dtypes_from_dtypes_set(
4315-
dtypes_set: frozenset[Dtype], unique_dtypes: np.ndarray
4316-
) -> list[Dtype]:
4317-
extracted_dtypes = [
4318-
unique_dtype
4319-
for unique_dtype in unique_dtypes
4320-
if (
4321-
issubclass(
4322-
# error: Argument 1 to "tuple" has incompatible type
4323-
# "FrozenSet[Union[ExtensionDtype, Union[str, Any], Type[str],
4324-
# Type[float], Type[int], Type[complex], Type[bool],
4325-
# Type[object]]]"; expected "Iterable[Union[type, Tuple[Any,
4326-
# ...]]]"
4327-
unique_dtype.type,
4328-
tuple(dtypes_set), # type: ignore[arg-type]
4329-
)
4330-
or (
4331-
np.number in dtypes_set
4332-
and getattr(unique_dtype, "_is_numeric", False)
4333-
)
4334-
)
4335-
]
4336-
return extracted_dtypes
4311+
def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
4312+
return issubclass(dtype.type, tuple(dtypes_set)) or (
4313+
np.number in dtypes_set and getattr(dtype, "_is_numeric", False)
4314+
)
43374315

4338-
unique_dtypes = self.dtypes.unique()
4316+
def predicate(arr: ArrayLike) -> bool:
4317+
dtype = arr.dtype
4318+
if include:
4319+
if not dtype_predicate(dtype, include):
4320+
return False
43394321

4340-
if include:
4341-
included_dtypes = extract_unique_dtypes_from_dtypes_set(
4342-
include, unique_dtypes
4343-
)
4344-
keep_these &= self.dtypes.isin(included_dtypes)
4322+
if exclude:
4323+
if dtype_predicate(dtype, exclude):
4324+
return False
43454325

4346-
if exclude:
4347-
excluded_dtypes = extract_unique_dtypes_from_dtypes_set(
4348-
exclude, unique_dtypes
4349-
)
4350-
keep_these &= ~self.dtypes.isin(excluded_dtypes)
4326+
return True
43514327

4352-
# error: "ndarray" has no attribute "values"
4353-
return self.iloc[:, keep_these.values] # type: ignore[attr-defined]
4328+
mgr = self._mgr._get_data_subset(predicate)
4329+
return type(self)(mgr).__finalize__(self)
43544330

43554331
def insert(self, loc, column, value, allow_duplicates: bool = False) -> None:
43564332
"""

pandas/core/internals/array_manager.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,11 @@ def _get_data_subset(self: T, predicate: Callable) -> T:
474474
indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]
475475
arrays = [self.arrays[i] for i in indices]
476476
# TODO copy?
477-
new_axes = [self._axes[0], self._axes[1][np.array(indices, dtype="intp")]]
477+
# Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq,
478+
# see test_describe_datetime_columns
479+
taker = np.array(indices, dtype="intp")
480+
new_cols = self._axes[1].take(taker)
481+
new_axes = [self._axes[0], new_cols]
478482
return type(self)(arrays, new_axes, verify_integrity=False)
479483

480484
def get_bool_data(self: T, copy: bool = False) -> T:

pandas/core/internals/concat.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,16 @@ def concat_arrays(to_concat: list) -> ArrayLike:
123123
# ignore the all-NA proxies to determine the resulting dtype
124124
to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]
125125

126-
single_dtype = len({x.dtype for x in to_concat_no_proxy}) == 1
126+
dtypes = {x.dtype for x in to_concat_no_proxy}
127+
single_dtype = len(dtypes) == 1
127128

128-
if not single_dtype:
129-
target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy])
130-
else:
129+
if single_dtype:
131130
target_dtype = to_concat_no_proxy[0].dtype
131+
elif all(x.kind in ["i", "u", "b"] and isinstance(x, np.dtype) for x in dtypes):
132+
# GH#42092
133+
target_dtype = np.find_common_type(list(dtypes), [])
134+
else:
135+
target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy])
132136

133137
if target_dtype.kind in ["m", "M"]:
134138
# for datetimelike use DatetimeArray/TimedeltaArray concatenation

pandas/io/stata.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,12 +1723,7 @@ def read(
17231723
if self.dtyplist[i] is not None:
17241724
col = data.columns[i]
17251725
dtype = data[col].dtype
1726-
# error: Value of type variable "_DTypeScalar" of "dtype" cannot be
1727-
# "object"
1728-
if (
1729-
dtype != np.dtype(object) # type: ignore[type-var]
1730-
and dtype != self.dtyplist[i]
1731-
):
1726+
if dtype != np.dtype(object) and dtype != self.dtyplist[i]:
17321727
requires_type_conversion = True
17331728
data_formatted.append(
17341729
(col, Series(data[col], ix, self.dtyplist[i]))

requirements-dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ xlrd
6464
xlsxwriter
6565
xlwt
6666
odfpy
67-
fastparquet>=0.4.0
67+
fastparquet>=0.4.0, <0.7.0
6868
pyarrow>=0.17.0
6969
python-snappy
7070
tables>=3.6.1

0 commit comments

Comments
 (0)