Skip to content

Commit f271005

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into less24024b
2 parents 1b109b8 + 6b31abd commit f271005

File tree

8 files changed

+142
-80
lines changed

8 files changed

+142
-80
lines changed

azure-pipelines.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ jobs:
4343
ci/incremental/install_miniconda.sh
4444
ci/incremental/setup_conda_environment.sh
4545
displayName: 'Set up environment'
46+
condition: true
4647
4748
# Do not require pandas
4849
- script: |

ci/code_checks.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,12 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
158158
# RET=$(($RET + $?)) ; echo $MSG "DONE"
159159

160160
MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG
161-
invgrep --exclude="*.svg" -RI "\s$" *
161+
set -o pipefail
162+
if [[ "$AZURE" == "true" ]]; then
163+
! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
164+
else
165+
! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
166+
fi
162167
RET=$(($RET + $?)) ; echo $MSG "DONE"
163168
fi
164169

doc/source/whatsnew/v0.24.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,7 @@ Datetimelike
13181318
- Bug in :class:`DatetimeIndex` where constructing a :class:`DatetimeIndex` from a :class:`Categorical` or :class:`CategoricalIndex` would incorrectly drop timezone information (:issue:`18664`)
13191319
- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where indexing with ``Ellipsis`` would incorrectly lose the index's ``freq`` attribute (:issue:`21282`)
13201320
- Clarified error message produced when passing an incorrect ``freq`` argument to :class:`DatetimeIndex` with ``NaT`` as the first entry in the passed data (:issue:`11587`)
1321+
- Bug in :func:`to_datetime` where ``box`` and ``utc`` arguments were ignored when passing a :class:`DataFrame` or ``dict`` of unit mappings (:issue:`23760`)
13211322

13221323
Timedelta
13231324
^^^^^^^^^
@@ -1361,6 +1362,7 @@ Timezones
13611362
- Bug in :class:`DatetimeIndex` constructor where ``NaT`` and ``dateutil.tz.tzlocal`` would raise an ``OutOfBoundsDatetime`` error (:issue:`23807`)
13621363
- Bug in :meth:`DatetimeIndex.tz_localize` and :meth:`Timestamp.tz_localize` with ``dateutil.tz.tzlocal`` near a DST transition that would return an incorrectly localized datetime (:issue:`23807`)
13631364
- Bug in :class:`Timestamp` constructor where a ``dateutil.tz.tzutc`` timezone passed with a ``datetime.datetime`` argument would be converted to a ``pytz.UTC`` timezone (:issue:`23807`)
1365+
- Bug in :func:`to_datetime` where ``utc=True`` was not respected when specifying a ``unit`` and ``errors='ignore'`` (:issue:`23758`)
13641366

13651367
Offsets
13661368
^^^^^^^

pandas/core/arrays/categorical.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,6 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
341341
elif categories is not None or ordered is not None:
342342
raise ValueError("Cannot specify both `dtype` and `categories`"
343343
" or `ordered`.")
344-
345-
categories = dtype.categories
346-
347344
elif is_categorical(values):
348345
# If no "dtype" was passed, use the one from "values", but honor
349346
# the "ordered" and "categories" arguments
@@ -355,19 +352,17 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
355352
if (isinstance(values, (ABCSeries, ABCIndexClass)) and
356353
isinstance(values._values, type(self))):
357354
values = values._values.codes.copy()
358-
if categories is None:
359-
categories = dtype.categories
360355
fastpath = True
361-
362356
else:
363357
# If dtype=None and values is not categorical, create a new dtype
364358
dtype = CategoricalDtype(categories, ordered)
365359

366-
# At this point, dtype is always a CategoricalDtype
360+
# At this point, dtype is always a CategoricalDtype and you should not
361+
# use categories and ordered seperately.
367362
# if dtype.categories is None, we are inferring
368363

369364
if fastpath:
370-
self._codes = coerce_indexer_dtype(values, categories)
365+
self._codes = coerce_indexer_dtype(values, dtype.categories)
371366
self._dtype = self._dtype.update_dtype(dtype)
372367
return
373368

@@ -379,7 +374,6 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
379374
if is_categorical_dtype(values):
380375
if dtype.categories is None:
381376
dtype = CategoricalDtype(values.categories, dtype.ordered)
382-
383377
elif not isinstance(values, (ABCIndexClass, ABCSeries)):
384378
# sanitize_array coerces np.nan to a string under certain versions
385379
# of numpy

pandas/core/tools/datetimes.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,12 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
204204
if box:
205205
if errors == 'ignore':
206206
from pandas import Index
207-
return Index(result, name=name)
207+
result = Index(result, name=name)
208+
# GH 23758: We may still need to localize the result with tz
209+
try:
210+
return result.tz_localize(tz)
211+
except AttributeError:
212+
return result
208213

209214
return DatetimeIndex(result, tz=tz, name=name)
210215
return result
@@ -572,7 +577,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
572577
values = convert_listlike(arg._values, True, format)
573578
result = Series(values, index=arg.index, name=arg.name)
574579
elif isinstance(arg, (ABCDataFrame, compat.MutableMapping)):
575-
result = _assemble_from_unit_mappings(arg, errors=errors)
580+
result = _assemble_from_unit_mappings(arg, errors, box, tz)
576581
elif isinstance(arg, ABCIndexClass):
577582
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
578583
if not cache_array.empty:
@@ -618,7 +623,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
618623
}
619624

620625

621-
def _assemble_from_unit_mappings(arg, errors):
626+
def _assemble_from_unit_mappings(arg, errors, box, tz):
622627
"""
623628
assemble the unit specified fields from the arg (DataFrame)
624629
Return a Series for actual parsing
@@ -631,6 +636,11 @@ def _assemble_from_unit_mappings(arg, errors):
631636
- If 'raise', then invalid parsing will raise an exception
632637
- If 'coerce', then invalid parsing will be set as NaT
633638
- If 'ignore', then invalid parsing will return the input
639+
box : boolean
640+
641+
- If True, return a DatetimeIndex
642+
- If False, return an array
643+
tz : None or 'utc'
634644
635645
Returns
636646
-------
@@ -683,7 +693,7 @@ def coerce(values):
683693
coerce(arg[unit_rev['month']]) * 100 +
684694
coerce(arg[unit_rev['day']]))
685695
try:
686-
values = to_datetime(values, format='%Y%m%d', errors=errors)
696+
values = to_datetime(values, format='%Y%m%d', errors=errors, utc=tz)
687697
except (TypeError, ValueError) as e:
688698
raise ValueError("cannot assemble the "
689699
"datetimes: {error}".format(error=e))
@@ -698,7 +708,8 @@ def coerce(values):
698708
except (TypeError, ValueError) as e:
699709
raise ValueError("cannot assemble the datetimes [{value}]: "
700710
"{error}".format(value=value, error=e))
701-
711+
if not box:
712+
return values.values
702713
return values
703714

704715

pandas/tests/indexes/datetimes/test_tools.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,33 @@ def test_dataframe_dtypes(self, cache):
949949
with pytest.raises(ValueError):
950950
to_datetime(df, cache=cache)
951951

952+
def test_dataframe_box_false(self):
953+
# GH 23760
954+
df = pd.DataFrame({'year': [2015, 2016],
955+
'month': [2, 3],
956+
'day': [4, 5]})
957+
result = pd.to_datetime(df, box=False)
958+
expected = np.array(['2015-02-04', '2016-03-05'],
959+
dtype='datetime64[ns]')
960+
tm.assert_numpy_array_equal(result, expected)
961+
962+
def test_dataframe_utc_true(self):
963+
# GH 23760
964+
df = pd.DataFrame({'year': [2015, 2016],
965+
'month': [2, 3],
966+
'day': [4, 5]})
967+
result = pd.to_datetime(df, utc=True)
968+
expected = pd.Series(np.array(['2015-02-04', '2016-03-05'],
969+
dtype='datetime64[ns]')).dt.tz_localize('UTC')
970+
tm.assert_series_equal(result, expected)
971+
972+
def test_to_datetime_errors_ignore_utc_true(self):
973+
# GH 23758
974+
result = pd.to_datetime([1], unit='s', box=True, utc=True,
975+
errors='ignore')
976+
expected = DatetimeIndex(['1970-01-01 00:00:01'], tz='UTC')
977+
tm.assert_index_equal(result, expected)
978+
952979

953980
class TestToDatetimeMisc(object):
954981
def test_to_datetime_barely_out_of_bounds(self):

pandas/tests/indexing/multiindex/test_getitem.py

Lines changed: 62 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
from pandas.compat import lrange, range, u, zip
55

66
import pandas as pd
7-
from pandas import DataFrame, Index, MultiIndex, Series, date_range
7+
from pandas import DataFrame, Index, MultiIndex, Series
88
import pandas.core.common as com
9+
from pandas.core.indexing import IndexingError
910
from pandas.util import testing as tm
1011

1112

@@ -27,29 +28,6 @@ def test_series_getitem_multiindex(access_method, level1_value, expected):
2728
tm.assert_series_equal(result, expected)
2829

2930

30-
def test_series_getitem_multiindex_xs():
31-
# GH6258
32-
dt = list(date_range('20130903', periods=3))
33-
idx = MultiIndex.from_product([list('AB'), dt])
34-
s = Series([1, 3, 4, 1, 3, 4], index=idx)
35-
36-
result = s.xs('20130903', level=1)
37-
expected = Series([1, 1], index=list('AB'))
38-
tm.assert_series_equal(result, expected)
39-
40-
41-
def test_series_getitem_multiindex_xs_by_label():
42-
# GH5684
43-
idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'),
44-
('b', 'two')])
45-
s = Series([1, 2, 3, 4], index=idx)
46-
s.index.set_names(['L1', 'L2'], inplace=True)
47-
result = s.xs('one', level='L2')
48-
expected = Series([1, 3], index=['a', 'b'])
49-
expected.index.set_names(['L1'], inplace=True)
50-
tm.assert_series_equal(result, expected)
51-
52-
5331
@pytest.mark.parametrize('level0_value', ['D', 'A'])
5432
def test_getitem_duplicates_multiindex(level0_value):
5533
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
@@ -145,63 +123,83 @@ def test_getitem_duplicates_multiindex_non_scalar_type_object():
145123

146124

147125
def test_getitem_simple(multiindex_dataframe_random_data):
148-
frame = multiindex_dataframe_random_data
149-
df = frame.T
150-
151-
col = df['foo', 'one']
152-
tm.assert_almost_equal(col.values, df.values[:, 0])
153-
msg = r"\('foo', 'four'\)"
154-
with pytest.raises(KeyError, match=msg):
155-
df[('foo', 'four')]
156-
msg = "'foobar'"
157-
with pytest.raises(KeyError, match=msg):
158-
df['foobar']
126+
df = multiindex_dataframe_random_data.T
127+
expected = df.values[:, 0]
128+
result = df['foo', 'one'].values
129+
tm.assert_almost_equal(result, expected)
159130

160131

161-
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
162-
def test_series_getitem(multiindex_year_month_day_dataframe_random_data):
163-
ymd = multiindex_year_month_day_dataframe_random_data
164-
s = ymd['A']
165-
166-
result = s[2000, 3]
132+
@pytest.mark.parametrize('indexer,msg', [
133+
(lambda df: df[('foo', 'four')], r"\('foo', 'four'\)"),
134+
(lambda df: df['foobar'], "'foobar'")
135+
])
136+
def test_getitem_simple_key_error(
137+
multiindex_dataframe_random_data, indexer, msg):
138+
df = multiindex_dataframe_random_data.T
139+
with pytest.raises(KeyError, match=msg):
140+
indexer(df)
167141

168-
# TODO(wesm): unused?
169-
# result2 = s.loc[2000, 3]
170142

143+
@pytest.mark.parametrize('indexer', [
144+
lambda s: s[2000, 3],
145+
lambda s: s.loc[2000, 3]
146+
])
147+
def test_series_getitem(
148+
multiindex_year_month_day_dataframe_random_data, indexer):
149+
s = multiindex_year_month_day_dataframe_random_data['A']
171150
expected = s.reindex(s.index[42:65])
172151
expected.index = expected.index.droplevel(0).droplevel(0)
152+
153+
result = indexer(s)
173154
tm.assert_series_equal(result, expected)
174155

175-
result = s[2000, 3, 10]
176-
expected = s[49]
156+
157+
@pytest.mark.parametrize('indexer', [
158+
lambda s: s[2000, 3, 10],
159+
lambda s: s.loc[2000, 3, 10]
160+
])
161+
def test_series_getitem_returns_scalar(
162+
multiindex_year_month_day_dataframe_random_data, indexer):
163+
s = multiindex_year_month_day_dataframe_random_data['A']
164+
expected = s.iloc[49]
165+
166+
result = indexer(s)
177167
assert result == expected
178168

179-
# fancy
169+
170+
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
171+
@pytest.mark.parametrize('indexer', [
172+
lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]],
173+
lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]]
174+
])
175+
def test_series_getitem_fancy(
176+
multiindex_year_month_day_dataframe_random_data, indexer):
177+
s = multiindex_year_month_day_dataframe_random_data['A']
180178
expected = s.reindex(s.index[49:51])
181-
result = s.loc[[(2000, 3, 10), (2000, 3, 13)]]
182-
tm.assert_series_equal(result, expected)
183179

184-
result = s.ix[[(2000, 3, 10), (2000, 3, 13)]]
180+
result = indexer(s)
185181
tm.assert_series_equal(result, expected)
186182

187-
# key error
188-
msg = "356"
189-
with pytest.raises(KeyError, match=msg):
190-
s.__getitem__((2000, 3, 4))
191-
192183

193-
def test_series_getitem_corner(
194-
multiindex_year_month_day_dataframe_random_data):
195-
ymd = multiindex_year_month_day_dataframe_random_data
196-
s = ymd['A']
184+
@pytest.mark.parametrize('indexer,error,msg', [
185+
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, '356'),
186+
(lambda s: s[(2000, 3, 4)], KeyError, '356'),
187+
(lambda s: s.loc[(2000, 3, 4)], IndexingError, 'Too many indexers'),
188+
(lambda s: s.__getitem__(len(s)), IndexError, 'index out of bounds'),
189+
(lambda s: s[len(s)], IndexError, 'index out of bounds'),
190+
(lambda s: s.iloc[len(s)], IndexError,
191+
'single positional indexer is out-of-bounds')
192+
])
193+
def test_series_getitem_indexing_errors(
194+
multiindex_year_month_day_dataframe_random_data, indexer, error, msg):
195+
s = multiindex_year_month_day_dataframe_random_data['A']
196+
with pytest.raises(error, match=msg):
197+
indexer(s)
197198

198-
# don't segfault, GH #495
199-
# out of bounds access
200-
msg = "index out of bounds"
201-
with pytest.raises(IndexError, match=msg):
202-
s.__getitem__(len(ymd))
203199

204-
# generator
200+
def test_series_getitem_corner_generator(
201+
multiindex_year_month_day_dataframe_random_data):
202+
s = multiindex_year_month_day_dataframe_random_data['A']
205203
result = s[(x > 0 for x in s)]
206204
expected = s[s > 0]
207205
tm.assert_series_equal(result, expected)

pandas/tests/indexing/multiindex/test_xs.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from pandas.compat import lrange, product as cart_product
55

6-
from pandas import DataFrame, Index, MultiIndex, concat
6+
from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range
77
import pandas.core.common as com
88
from pandas.util import testing as tm
99

@@ -211,3 +211,27 @@ def test_xs_level_series_slice_not_implemented(
211211
msg = r'\(2000, slice\(3, 4, None\)\)'
212212
with pytest.raises(TypeError, match=msg):
213213
s[2000, 3:4]
214+
215+
216+
def test_series_getitem_multiindex_xs():
217+
# GH6258
218+
dt = list(date_range('20130903', periods=3))
219+
idx = MultiIndex.from_product([list('AB'), dt])
220+
s = Series([1, 3, 4, 1, 3, 4], index=idx)
221+
expected = Series([1, 1], index=list('AB'))
222+
223+
result = s.xs('20130903', level=1)
224+
tm.assert_series_equal(result, expected)
225+
226+
227+
def test_series_getitem_multiindex_xs_by_label():
228+
# GH5684
229+
idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'),
230+
('b', 'two')])
231+
s = Series([1, 2, 3, 4], index=idx)
232+
s.index.set_names(['L1', 'L2'], inplace=True)
233+
expected = Series([1, 3], index=['a', 'b'])
234+
expected.index.set_names(['L1'], inplace=True)
235+
236+
result = s.xs('one', level='L2')
237+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)