Skip to content

Commit 7c0e6f7

Browse files
committed
Merge remote-tracking branch 'refs/remotes/pydata/master' into Fix-for-pandas-dev#11317
Conflicts: doc/source/whatsnew/v0.17.1.txt Bringing it up to date with the current master
2 parents c202599 + 89b4e5b commit 7c0e6f7

File tree

16 files changed

+344
-191
lines changed

16 files changed

+344
-191
lines changed

asv_bench/benchmarks/frame_methods.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,16 @@ def time_frame_xs_row(self):
930930
self.df.xs(50000)
931931

932932

933+
class frame_sort_index(object):
934+
goal_time = 0.2
935+
936+
def setup(self):
937+
self.df = DataFrame(randn(1000000, 2), columns=list('AB'))
938+
939+
def time_frame_sort_index(self):
940+
self.df.sort_index()
941+
942+
933943
class series_string_vector_slice(object):
934944
goal_time = 0.2
935945

doc/source/whatsnew/v0.17.1.txt

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,17 @@ API changes
4343
Deprecations
4444
^^^^^^^^^^^^
4545

46+
- The ``pandas.io.ga`` module which implements ``google-analytics`` support is deprecated and will be removed in a future version (:issue:`11308`)
47+
- Deprecate the ``engine`` keyword from ``.to_csv()``, which will be removed in a future version (:issue:`11274`)
48+
49+
4650
.. _whatsnew_0171.performance:
4751

4852
Performance Improvements
4953
~~~~~~~~~~~~~~~~~~~~~~~~
5054

55+
- Checking monotonic-ness before sorting on an index (:issue:`11080`)
56+
5157
.. _whatsnew_0171.bug_fixes:
5258

5359
Bug Fixes
@@ -61,10 +67,10 @@ Bug Fixes
6167
- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issues:`11295`)
6268

6369

70+
- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`)
6471

6572

66-
67-
73+
- Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`)
6874

6975

7076

@@ -92,3 +98,7 @@ Bug Fixes
9298
``datetime64[ns, tz]`` (:issue:`11245`).
9399

94100
- Bug in ``read_excel`` with multi-index containing integers (:issue:`11317`, :issue:`11328`)
101+
102+
- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`)
103+
104+

pandas/core/common.py

Lines changed: 1 addition & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import re
66
import collections
77
import numbers
8-
import codecs
9-
import csv
108
import types
119
from datetime import datetime, timedelta
1210
from functools import partial
@@ -19,7 +17,7 @@
1917
import pandas.lib as lib
2018
import pandas.tslib as tslib
2119
from pandas import compat
22-
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types, iteritems
20+
from pandas.compat import BytesIO, range, long, u, zip, map, string_types, iteritems
2321
from pandas.core.dtypes import CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType
2422
from pandas.core.config import get_option
2523

@@ -2808,154 +2806,6 @@ def _all_none(*args):
28082806
return True
28092807

28102808

2811-
class UTF8Recoder:
2812-
2813-
"""
2814-
Iterator that reads an encoded stream and reencodes the input to UTF-8
2815-
"""
2816-
2817-
def __init__(self, f, encoding):
2818-
self.reader = codecs.getreader(encoding)(f)
2819-
2820-
def __iter__(self):
2821-
return self
2822-
2823-
def read(self, bytes=-1):
2824-
return self.reader.read(bytes).encode('utf-8')
2825-
2826-
def readline(self):
2827-
return self.reader.readline().encode('utf-8')
2828-
2829-
def next(self):
2830-
return next(self.reader).encode("utf-8")
2831-
2832-
# Python 3 iterator
2833-
__next__ = next
2834-
2835-
2836-
def _get_handle(path, mode, encoding=None, compression=None):
2837-
"""Gets file handle for given path and mode.
2838-
NOTE: Under Python 3.2, getting a compressed file handle means reading in
2839-
the entire file, decompressing it and decoding it to ``str`` all at once
2840-
and then wrapping it in a StringIO.
2841-
"""
2842-
if compression is not None:
2843-
if encoding is not None and not compat.PY3:
2844-
msg = 'encoding + compression not yet supported in Python 2'
2845-
raise ValueError(msg)
2846-
2847-
if compression == 'gzip':
2848-
import gzip
2849-
f = gzip.GzipFile(path, mode)
2850-
elif compression == 'bz2':
2851-
import bz2
2852-
f = bz2.BZ2File(path, mode)
2853-
else:
2854-
raise ValueError('Unrecognized compression type: %s' %
2855-
compression)
2856-
if compat.PY3:
2857-
from io import TextIOWrapper
2858-
f = TextIOWrapper(f, encoding=encoding)
2859-
return f
2860-
else:
2861-
if compat.PY3:
2862-
if encoding:
2863-
f = open(path, mode, encoding=encoding)
2864-
else:
2865-
f = open(path, mode, errors='replace')
2866-
else:
2867-
f = open(path, mode)
2868-
2869-
return f
2870-
2871-
2872-
if compat.PY3: # pragma: no cover
2873-
def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
2874-
# ignore encoding
2875-
return csv.reader(f, dialect=dialect, **kwds)
2876-
2877-
def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
2878-
return csv.writer(f, dialect=dialect, **kwds)
2879-
else:
2880-
class UnicodeReader:
2881-
2882-
"""
2883-
A CSV reader which will iterate over lines in the CSV file "f",
2884-
which is encoded in the given encoding.
2885-
2886-
On Python 3, this is replaced (below) by csv.reader, which handles
2887-
unicode.
2888-
"""
2889-
2890-
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
2891-
f = UTF8Recoder(f, encoding)
2892-
self.reader = csv.reader(f, dialect=dialect, **kwds)
2893-
2894-
def next(self):
2895-
row = next(self.reader)
2896-
return [compat.text_type(s, "utf-8") for s in row]
2897-
2898-
# python 3 iterator
2899-
__next__ = next
2900-
2901-
def __iter__(self): # pragma: no cover
2902-
return self
2903-
2904-
class UnicodeWriter:
2905-
2906-
"""
2907-
A CSV writer which will write rows to CSV file "f",
2908-
which is encoded in the given encoding.
2909-
"""
2910-
2911-
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
2912-
# Redirect output to a queue
2913-
self.queue = StringIO()
2914-
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
2915-
self.stream = f
2916-
self.encoder = codecs.getincrementalencoder(encoding)()
2917-
self.quoting = kwds.get("quoting", None)
2918-
2919-
def writerow(self, row):
2920-
def _check_as_is(x):
2921-
return (self.quoting == csv.QUOTE_NONNUMERIC and
2922-
is_number(x)) or isinstance(x, str)
2923-
2924-
row = [x if _check_as_is(x)
2925-
else pprint_thing(x).encode('utf-8') for x in row]
2926-
2927-
self.writer.writerow([s for s in row])
2928-
# Fetch UTF-8 output from the queue ...
2929-
data = self.queue.getvalue()
2930-
data = data.decode("utf-8")
2931-
# ... and reencode it into the target encoding
2932-
data = self.encoder.encode(data)
2933-
# write to the target stream
2934-
self.stream.write(data)
2935-
# empty queue
2936-
self.queue.truncate(0)
2937-
2938-
def writerows(self, rows):
2939-
def _check_as_is(x):
2940-
return (self.quoting == csv.QUOTE_NONNUMERIC and
2941-
is_number(x)) or isinstance(x, str)
2942-
2943-
for i, row in enumerate(rows):
2944-
rows[i] = [x if _check_as_is(x)
2945-
else pprint_thing(x).encode('utf-8') for x in row]
2946-
2947-
self.writer.writerows([[s for s in row] for row in rows])
2948-
# Fetch UTF-8 output from the queue ...
2949-
data = self.queue.getvalue()
2950-
data = data.decode("utf-8")
2951-
# ... and reencode it into the target encoding
2952-
data = self.encoder.encode(data)
2953-
# write to the target stream
2954-
self.stream.write(data)
2955-
# empty queue
2956-
self.queue.truncate(0)
2957-
2958-
29592809
def get_dtype_kinds(l):
29602810
"""
29612811
Parameters

pandas/core/format.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
OrderedDict)
1414
from pandas.util.terminal import get_terminal_size
1515
from pandas.core.config import get_option, set_option
16+
from pandas.io.common import _get_handle, UnicodeWriter
1617
import pandas.core.common as com
1718
import pandas.lib as lib
1819
from pandas.tslib import iNaT, Timestamp, Timedelta, format_array_from_datetime
@@ -23,6 +24,7 @@
2324

2425
import itertools
2526
import csv
27+
import warnings
2628

2729
common_docstring = """
2830
Parameters
@@ -1264,7 +1266,11 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
12641266
tupleize_cols=False, quotechar='"', date_format=None,
12651267
doublequote=True, escapechar=None, decimal='.'):
12661268

1267-
self.engine = engine # remove for 0.13
1269+
if engine is not None:
1270+
warnings.warn("'engine' keyword is deprecated and "
1271+
"will be removed in a future version",
1272+
FutureWarning, stacklevel=3)
1273+
self.engine = engine # remove for 0.18
12681274
self.obj = obj
12691275

12701276
if path_or_buf is None:
@@ -1470,8 +1476,8 @@ def save(self):
14701476
f = self.path_or_buf
14711477
close = False
14721478
else:
1473-
f = com._get_handle(self.path_or_buf, self.mode,
1474-
encoding=self.encoding,
1479+
f = _get_handle(self.path_or_buf, self.mode,
1480+
encoding=self.encoding,
14751481
compression=self.compression)
14761482
close = True
14771483

@@ -1483,7 +1489,7 @@ def save(self):
14831489
quotechar=self.quotechar)
14841490
if self.encoding is not None:
14851491
writer_kwargs['encoding'] = self.encoding
1486-
self.writer = com.UnicodeWriter(f, **writer_kwargs)
1492+
self.writer = UnicodeWriter(f, **writer_kwargs)
14871493
else:
14881494
self.writer = csv.writer(f, **writer_kwargs)
14891495

pandas/core/frame.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -802,11 +802,12 @@ def to_dict(self, orient='dict'):
802802
elif orient.lower().startswith('sp'):
803803
return {'index': self.index.tolist(),
804804
'columns': self.columns.tolist(),
805-
'data': self.values.tolist()}
805+
'data': lib.map_infer(self.values.ravel(), _maybe_box_datetimelike)
806+
.reshape(self.values.shape).tolist()}
806807
elif orient.lower().startswith('s'):
807-
return dict((k, v) for k, v in compat.iteritems(self))
808+
return dict((k, _maybe_box_datetimelike(v)) for k, v in compat.iteritems(self))
808809
elif orient.lower().startswith('r'):
809-
return [dict((k, v) for k, v in zip(self.columns, row))
810+
return [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(self.columns, row))
810811
for row in self.values]
811812
elif orient.lower().startswith('i'):
812813
return dict((k, v.to_dict()) for k, v in self.iterrows())
@@ -3157,6 +3158,15 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
31573158
else:
31583159
from pandas.core.groupby import _nargsort
31593160

3161+
# GH11080 - Check monotonic-ness before sort an index
3162+
# if monotonic (already sorted), return None or copy() according to 'inplace'
3163+
if (ascending and labels.is_monotonic_increasing) or \
3164+
(not ascending and labels.is_monotonic_decreasing):
3165+
if inplace:
3166+
return
3167+
else:
3168+
return self.copy()
3169+
31603170
indexer = _nargsort(labels, kind=kind, ascending=ascending,
31613171
na_position=na_position)
31623172

pandas/core/index.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -982,10 +982,6 @@ def _convert_list_indexer(self, keyarr, kind=None):
982982
if kind in [None, 'iloc', 'ix'] and is_integer_dtype(keyarr) \
983983
and not self.is_floating() and not isinstance(keyarr, ABCPeriodIndex):
984984

985-
if self.inferred_type != 'integer':
986-
keyarr = np.where(keyarr < 0,
987-
len(self) + keyarr, keyarr)
988-
989985
if self.inferred_type == 'mixed-integer':
990986
indexer = self.get_indexer(keyarr)
991987
if (indexer >= 0).all():
@@ -998,6 +994,8 @@ def _convert_list_indexer(self, keyarr, kind=None):
998994
return maybe_convert_indices(indexer, len(self))
999995

1000996
elif not self.inferred_type == 'integer':
997+
keyarr = np.where(keyarr < 0,
998+
len(self) + keyarr, keyarr)
1001999
return keyarr
10021000

10031001
return None

0 commit comments

Comments
 (0)