Skip to content

Commit f86eb2d

Browse files
committed
Merge branch 'bug_issue16770' of github.com:ri938/pandas into bug_issue16770
2 parents b8edfc1 + 4165b31 commit f86eb2d

36 files changed

+383
-83
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
- [ ] closes #xxxx
22
- [ ] tests added / passed
3-
- [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff``
3+
- [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` (On Windows, ``git diff upstream/master -u -- "*.py" | flake8 --diff`` might work as an alternative.)
44
- [ ] whatsnew entry

.travis.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,61 +35,61 @@ matrix:
3535
language: generic
3636
env:
3737
- JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network"
38-
- os: linux
38+
- dist: trusty
3939
env:
4040
- JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8"
4141
addons:
4242
apt:
4343
packages:
4444
- language-pack-zh-hans
45-
- os: linux
45+
- dist: trusty
4646
env:
4747
- JOB="2.7" TEST_ARGS="--skip-slow" LINT=true
4848
addons:
4949
apt:
5050
packages:
5151
- python-gtk2
52-
- os: linux
52+
- dist: trusty
5353
env:
5454
- JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true
5555
addons:
5656
apt:
5757
packages:
5858
- xsel
59-
- os: linux
59+
- dist: trusty
6060
env:
6161
- JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true
6262
# In allow_failures
63-
- os: linux
63+
- dist: trusty
6464
env:
6565
- JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network"
6666
# In allow_failures
67-
- os: linux
67+
- dist: trusty
6868
env:
6969
- JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
7070
# In allow_failures
71-
- os: linux
71+
- dist: trusty
7272
env:
7373
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
7474
# In allow_failures
75-
- os: linux
75+
- dist: trusty
7676
env:
7777
- JOB="3.6_DOC" DOC=true
7878
addons:
7979
apt:
8080
packages:
8181
- xsel
8282
allow_failures:
83-
- os: linux
83+
- dist: trusty
8484
env:
8585
- JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network"
86-
- os: linux
86+
- dist: trusty
8787
env:
8888
- JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
89-
- os: linux
89+
- dist: trusty
9090
env:
9191
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
92-
- os: linux
92+
- dist: trusty
9393
env:
9494
- JOB="3.6_DOC" DOC=true
9595

ci/requirements-2.7.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ source activate pandas
44

55
echo "install 27"
66

7-
conda install -n pandas -c conda-forge feather-format jemalloc=4.4.0
7+
conda install -n pandas -c conda-forge feather-format

ci/requirements-2.7_BUILD_TEST.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ source activate pandas
44

55
echo "install 27 BUILD_TEST"
66

7-
conda install -n pandas -c conda-forge pyarrow dask jemalloc=4.4.0
7+
conda install -n pandas -c conda-forge pyarrow dask

ci/requirements-3.5.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ source activate pandas
44

55
echo "install 35"
66

7-
conda install -n pandas -c conda-forge feather-format jemalloc=4.4.0
7+
conda install -n pandas -c conda-forge feather-format

ci/requirements-3.6.run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ html5lib
1414
jinja2
1515
sqlalchemy
1616
pymysql
17-
jemalloc=4.4.0
1817
feather-format
1918
# psycopg2 (not avail on defaults ATM)
2019
beautifulsoup4

ci/requirements-3.6_DOC.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]"
66

77
pip install pandas-gbq
88

9-
conda install -n pandas -c conda-forge feather-format nbsphinx pandoc jemalloc=4.4.0
9+
conda install -n pandas -c conda-forge feather-format nbsphinx pandoc
1010

1111
conda install -n pandas -c r r rpy2 --yes

doc/source/contributing.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,12 @@ run this slightly modified command::
525525

526526
git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8
527527

528+
Note that on Windows, ``grep``, ``xargs``, and other tools are likely
529+
unavailable. However, this has been shown to work on smaller commits in the
530+
standard Windows command line::
531+
532+
git diff master -u -- "*.py" | flake8 --diff
533+
528534
Backwards Compatibility
529535
~~~~~~~~~~~~~~~~~~~~~~~
530536

doc/source/whatsnew/v0.20.3.txt

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,18 @@ Performance Improvements
3737
Bug Fixes
3838
~~~~~~~~~
3939
- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`)
40+
- Fixed issue with :meth:`DataFrame.style` where element id's were not unique (:issue:`16780`)
41+
- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`)
42+
- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`)
4043
- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`)
41-
42-
44+
- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`)
4345

4446
Conversion
4547
^^^^^^^^^^
4648

4749
- Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`)
48-
- Bug in Series construction when passing a Series with ``dtype='category'`` (:issue:`16524`).
50+
- Bug in ``Series`` construction when passing a ``Series`` with ``dtype='category'`` (:issue:`16524`).
51+
- Bug in ``DataFrame.astype()`` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`).
4952

5053
Indexing
5154
^^^^^^^^
@@ -55,8 +58,9 @@ Indexing
5558
I/O
5659
^^^
5760

58-
- Bug in :func:`read_csv`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`)
59-
- Bug in :func:`read_hdf`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)
61+
- Bug in :func:`read_csv` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`)
62+
- Bug in :func:`read_hdf` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)
63+
- Bug in :func:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`)
6064

6165
Plotting
6266
^^^^^^^^
@@ -78,6 +82,8 @@ Sparse
7882
Reshaping
7983
^^^^^^^^^
8084

85+
- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`).
86+
- Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`)
8187

8288

8389
Numeric

doc/source/whatsnew/v0.21.0.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ Performance Improvements
9292
Bug Fixes
9393
~~~~~~~~~
9494

95+
- Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
96+
9597
Conversion
9698
^^^^^^^^^^
9799

@@ -133,6 +135,7 @@ Reshaping
133135

134136
Numeric
135137
^^^^^^^
138+
- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`)
136139

137140

138141
Categorical

pandas/_libs/src/reduce.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ cdef class SeriesGrouper:
419419
cdef inline _extract_result(object res):
420420
""" extract the result object, it might be a 0-dim ndarray
421421
or a len-1 0-dim, or a scalar """
422-
if hasattr(res, 'values'):
422+
if hasattr(res, 'values') and isinstance(res.values, np.ndarray):
423423
res = res.values
424424
if not np.isscalar(res):
425425
if isinstance(res, np.ndarray):

pandas/core/generic.py

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
from pandas.compat.numpy import function as nv
5353
from pandas.compat import (map, zip, lzip, lrange, string_types,
5454
isidentifier, set_function_name, cPickle as pkl)
55+
from pandas.core.ops import _align_method_FRAME
5556
import pandas.core.nanops as nanops
5657
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
5758
from pandas.util._validators import validate_bool_kwarg
@@ -1538,7 +1539,7 @@ def to_xarray(self):
15381539
15391540
`to_latex`-specific options:
15401541
1541-
bold_rows : boolean, default True
1542+
bold_rows : boolean, default False
15421543
Make the row labels bold in the output
15431544
column_format : str, default None
15441545
The columns format as specified in `LaTeX table format
@@ -1587,7 +1588,7 @@ def to_xarray(self):
15871588
@Appender(_shared_docs['to_latex'] % _shared_doc_kwargs)
15881589
def to_latex(self, buf=None, columns=None, col_space=None, header=True,
15891590
index=True, na_rep='NaN', formatters=None, float_format=None,
1590-
sparsify=None, index_names=True, bold_rows=True,
1591+
sparsify=None, index_names=True, bold_rows=False,
15911592
column_format=None, longtable=None, escape=None,
15921593
encoding=None, decimal='.', multicolumn=None,
15931594
multicolumn_format=None, multirow=None):
@@ -3507,12 +3508,12 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
35073508
-------
35083509
casted : type of caller
35093510
"""
3510-
if isinstance(dtype, collections.Mapping):
3511+
if is_dict_like(dtype):
35113512
if self.ndim == 1: # i.e. Series
3512-
if len(dtype) > 1 or list(dtype.keys())[0] != self.name:
3513+
if len(dtype) > 1 or self.name not in dtype:
35133514
raise KeyError('Only the Series name can be used for '
35143515
'the key in Series dtype mappings.')
3515-
new_type = list(dtype.values())[0]
3516+
new_type = dtype[self.name]
35163517
return self.astype(new_type, copy, errors, **kwargs)
35173518
elif self.ndim > 2:
35183519
raise NotImplementedError(
@@ -4413,6 +4414,34 @@ def _clip_with_scalar(self, lower, upper, inplace=False):
44134414
else:
44144415
return result
44154416

4417+
def _clip_with_one_bound(self, threshold, method, axis, inplace):
4418+
4419+
inplace = validate_bool_kwarg(inplace, 'inplace')
4420+
if axis is not None:
4421+
axis = self._get_axis_number(axis)
4422+
4423+
if np.any(isnull(threshold)):
4424+
raise ValueError("Cannot use an NA value as a clip threshold")
4425+
4426+
# method is self.le for upper bound and self.ge for lower bound
4427+
if is_scalar(threshold) and is_number(threshold):
4428+
if method.__name__ == 'le':
4429+
return self._clip_with_scalar(None, threshold, inplace=inplace)
4430+
return self._clip_with_scalar(threshold, None, inplace=inplace)
4431+
4432+
subset = method(threshold, axis=axis) | isnull(self)
4433+
4434+
# GH #15390
4435+
# In order for where method to work, the threshold must
4436+
# be transformed to NDFrame from other array like structure.
4437+
if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):
4438+
if isinstance(self, ABCSeries):
4439+
threshold = pd.Series(threshold, index=self.index)
4440+
else:
4441+
threshold = _align_method_FRAME(self, np.asarray(threshold),
4442+
axis)
4443+
return self.where(subset, threshold, axis=axis, inplace=inplace)
4444+
44164445
def clip(self, lower=None, upper=None, axis=None, inplace=False,
44174446
*args, **kwargs):
44184447
"""
@@ -4515,16 +4544,8 @@ def clip_upper(self, threshold, axis=None, inplace=False):
45154544
-------
45164545
clipped : same type as input
45174546
"""
4518-
if np.any(isnull(threshold)):
4519-
raise ValueError("Cannot use an NA value as a clip threshold")
4520-
4521-
if is_scalar(threshold) and is_number(threshold):
4522-
return self._clip_with_scalar(None, threshold, inplace=inplace)
4523-
4524-
inplace = validate_bool_kwarg(inplace, 'inplace')
4525-
4526-
subset = self.le(threshold, axis=axis) | isnull(self)
4527-
return self.where(subset, threshold, axis=axis, inplace=inplace)
4547+
return self._clip_with_one_bound(threshold, method=self.le,
4548+
axis=axis, inplace=inplace)
45284549

45294550
def clip_lower(self, threshold, axis=None, inplace=False):
45304551
"""
@@ -4547,16 +4568,8 @@ def clip_lower(self, threshold, axis=None, inplace=False):
45474568
-------
45484569
clipped : same type as input
45494570
"""
4550-
if np.any(isnull(threshold)):
4551-
raise ValueError("Cannot use an NA value as a clip threshold")
4552-
4553-
if is_scalar(threshold) and is_number(threshold):
4554-
return self._clip_with_scalar(threshold, None, inplace=inplace)
4555-
4556-
inplace = validate_bool_kwarg(inplace, 'inplace')
4557-
4558-
subset = self.ge(threshold, axis=axis) | isnull(self)
4559-
return self.where(subset, threshold, axis=axis, inplace=inplace)
4571+
return self._clip_with_one_bound(threshold, method=self.ge,
4572+
axis=axis, inplace=inplace)
45604573

45614574
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
45624575
group_keys=True, squeeze=False, **kwargs):

pandas/core/indexes/category.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,9 @@ def take(self, indices, axis=0, allow_fill=True,
563563
na_value=-1)
564564
return self._create_from_codes(taken)
565565

566+
def is_dtype_equal(self, other):
567+
return self._data.is_dtype_equal(other)
568+
566569
take_nd = take
567570

568571
def map(self, mapper):

pandas/core/indexes/multi.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,6 +1383,9 @@ def __getitem__(self, key):
13831383
# cannot be sure whether the result will be sorted
13841384
sortorder = None
13851385

1386+
if isinstance(key, Index):
1387+
key = np.asarray(key)
1388+
13861389
new_labels = [lab[key] for lab in self.labels]
13871390

13881391
return MultiIndex(levels=self.levels, labels=new_labels,

pandas/core/reshape/merge.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,13 +1440,14 @@ def _factorize_keys(lk, rk, sort=True):
14401440
lk = lk.values
14411441
rk = rk.values
14421442

1443-
# if we exactly match in categories, allow us to use codes
1443+
# if we exactly match in categories, allow us to factorize on codes
14441444
if (is_categorical_dtype(lk) and
14451445
is_categorical_dtype(rk) and
14461446
lk.is_dtype_equal(rk)):
1447-
return lk.codes, rk.codes, len(lk.categories)
1448-
1449-
if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
1447+
klass = libhashtable.Int64Factorizer
1448+
lk = _ensure_int64(lk.codes)
1449+
rk = _ensure_int64(rk.codes)
1450+
elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
14501451
klass = libhashtable.Int64Factorizer
14511452
lk = _ensure_int64(com._values_from_object(lk))
14521453
rk = _ensure_int64(com._values_from_object(rk))

pandas/core/window.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -836,7 +836,7 @@ def count(self):
836836

837837
return self._wrap_results(results, blocks, obj)
838838

839-
_shared_docs['apply'] = dedent("""
839+
_shared_docs['apply'] = dedent(r"""
840840
%(name)s function apply
841841
842842
Parameters
@@ -1922,7 +1922,8 @@ def dataframe_from_int_dict(data, frame_template):
19221922

19231923
# TODO: not the most efficient (perf-wise)
19241924
# though not bad code-wise
1925-
from pandas import Panel, MultiIndex, Index
1925+
from pandas import Panel, MultiIndex
1926+
19261927
with warnings.catch_warnings(record=True):
19271928
p = Panel.from_dict(results).swapaxes('items', 'major')
19281929
if len(p.major_axis) > 0:
@@ -1945,10 +1946,10 @@ def dataframe_from_int_dict(data, frame_template):
19451946
# reset our index names to arg1 names
19461947
# reset our column names to arg2 names
19471948
# careful not to mutate the original names
1948-
result.columns = Index(result.columns).set_names(
1949-
arg2.columns.name)
1949+
result.columns = result.columns.set_names(
1950+
arg2.columns.names)
19501951
result.index = result.index.set_names(
1951-
[arg1.index.name, arg1.columns.name])
1952+
arg1.index.names + arg1.columns.names)
19521953

19531954
return result
19541955

0 commit comments

Comments
 (0)