Skip to content

Commit c18cf28

Browse files
authored
Merge branch 'master' into integer-array-from-bool
2 parents 7c24ea8 + 9821b77 commit c18cf28

File tree

277 files changed

+1536
-2508
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

277 files changed

+1536
-2508
lines changed

asv_bench/benchmarks/io/stata.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ class Stata(BaseIO):
1212

1313
def setup(self, convert_dates):
1414
self.fname = '__test__.dta'
15-
N = 100000
16-
C = 5
15+
N = self.N = 100000
16+
C = self.C = 5
1717
self.df = DataFrame(np.random.randn(N, C),
1818
columns=['float{}'.format(i) for i in range(C)],
1919
index=date_range('20000101', periods=N, freq='H'))
20-
self.df['object'] = tm.makeStringIndex(N)
20+
self.df['object'] = tm.makeStringIndex(self.N)
2121
self.df['int8_'] = np.random.randint(np.iinfo(np.int8).min,
2222
np.iinfo(np.int8).max - 27, N)
2323
self.df['int16_'] = np.random.randint(np.iinfo(np.int16).min,
@@ -36,4 +36,14 @@ def time_write_stata(self, convert_dates):
3636
self.df.to_stata(self.fname, self.convert_dates)
3737

3838

39+
class StataMissing(Stata):
40+
def setup(self, convert_dates):
41+
super(StataMissing, self).setup(convert_dates)
42+
for i in range(10):
43+
missing_data = np.random.randn(self.N)
44+
missing_data[missing_data < 0] = np.nan
45+
self.df['missing_{0}'.format(i)] = missing_data
46+
self.df.to_stata(self.fname, self.convert_dates)
47+
48+
3949
from ..pandas_vb_common import setup # noqa: F401

ci/azure/posix.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,18 @@ jobs:
1515
PATTERN: "not slow and not network"
1616

1717
${{ if eq(parameters.name, 'Linux') }}:
18+
py35_compat:
19+
ENV_FILE: ci/deps/azure-35-compat.yaml
20+
CONDA_PY: "35"
21+
PATTERN: "not slow and not network"
22+
23+
py36_locale_slow_old_np:
24+
ENV_FILE: ci/deps/azure-36-locale.yaml
25+
CONDA_PY: "36"
26+
PATTERN: "slow"
27+
LOCALE_OVERRIDE: "zh_CN.UTF-8"
28+
EXTRA_APT: "language-pack-zh-hans"
29+
1830
py36_locale_slow:
1931
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
2032
CONDA_PY: "36"

ci/azure/windows.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ jobs:
1212
ENV_FILE: ci/deps/azure-windows-36.yaml
1313
CONDA_PY: "36"
1414

15+
py37_np141:
16+
ENV_FILE: ci/deps/azure-windows-37.yaml
17+
CONDA_PY: "37"
18+
1519
steps:
1620
- task: CondaEnvironment@1
1721
inputs:

ci/code_checks.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,9 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
175175
set -o pipefail
176176
if [[ "$AZURE" == "true" ]]; then
177177
# we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
178-
! grep -n '--exclude=*.'{svg,c,cpp,html} -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
178+
! grep -n '--exclude=*.'{svg,c,cpp,html} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
179179
else
180-
! grep -n '--exclude=*.'{svg,c,cpp,html} -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
180+
! grep -n '--exclude=*.'{svg,c,cpp,html} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
181181
fi
182182
RET=$(($RET + $?)) ; echo $MSG "DONE"
183183
fi

ci/deps/azure-35-compat.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: pandas-dev
2+
channels:
3+
- defaults
4+
- conda-forge
5+
dependencies:
6+
- beautifulsoup4==4.4.1
7+
- bottleneck=1.2.0
8+
- cython=0.28.2
9+
- hypothesis>=3.58.0
10+
- jinja2=2.8
11+
- numexpr=2.6.1
12+
- numpy=1.12.0
13+
- openpyxl=2.4.0
14+
- pytables=3.4.2
15+
- python-dateutil=2.5.0
16+
- python=3.5*
17+
- pytz=2015.4
18+
- scipy=0.18.1
19+
- xlrd=1.0.0
20+
- xlsxwriter=0.7.7
21+
- xlwt=1.0.0
22+
# universal
23+
- pytest-xdist
24+
- pytest-mock
25+
- isort
26+
- pip:
27+
# for python 3.5, pytest>=4.0.2 is not available in conda
28+
- pytest>=4.0.2
29+
- html5lib==1.0b2

ci/deps/azure-36-locale.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: pandas-dev
2+
channels:
3+
- defaults
4+
- conda-forge
5+
dependencies:
6+
- beautifulsoup4==4.5.1
7+
- bottleneck=1.2.0
8+
- cython=0.28.2
9+
- lxml
10+
- matplotlib=2.0.0
11+
- numpy=1.12.0
12+
- openpyxl=2.4.0
13+
- python-dateutil
14+
- python-blosc
15+
- python=3.6
16+
- pytz=2016.10
17+
- scipy
18+
- sqlalchemy=1.1.4
19+
- xlrd=1.0.0
20+
- xlsxwriter=0.9.4
21+
- xlwt=1.2.0
22+
# universal
23+
- pytest>=4.0.2
24+
- pytest-xdist
25+
- pytest-mock
26+
- hypothesis>=3.58.0
27+
- isort
28+
- pip:
29+
- html5lib==1.0b2

ci/deps/azure-macos-35.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ dependencies:
1212
- nomkl
1313
- numexpr
1414
- numpy=1.12.0
15-
- openpyxl=2.5.5
15+
- openpyxl
1616
- pyarrow
1717
- pytables
1818
- python=3.5*

ci/deps/azure-windows-37.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: pandas-dev
2+
channels:
3+
- defaults
4+
- conda-forge
5+
dependencies:
6+
- beautifulsoup4
7+
- bottleneck
8+
- gcsfs
9+
- html5lib
10+
- jinja2
11+
- lxml
12+
- matplotlib=3.0.1
13+
- numexpr
14+
- numpy=1.14.*
15+
- openpyxl
16+
- pytables
17+
- python=3.7.*
18+
- python-dateutil
19+
- pytz
20+
- s3fs
21+
- scipy
22+
- sqlalchemy
23+
- xlrd
24+
- xlsxwriter
25+
- xlwt
26+
# universal
27+
- cython>=0.28.2
28+
- pytest>=4.0.2
29+
- pytest-xdist
30+
- pytest-mock
31+
- moto
32+
- hypothesis>=3.58.0
33+
- isort

ci/deps/travis-36-locale.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ dependencies:
1515
- numpy
1616
- openpyxl
1717
- psycopg2
18-
- pymysql
18+
- pymysql=0.7.9
1919
- pytables
2020
- python-dateutil
2121
- python=3.6*

doc/cheatsheet/README.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013.
22
To create the PDF version, within Powerpoint, simply do a "Save As"
3-
and pick "PDF' as the format.
3+
and pick "PDF" as the format.
44

5-
This cheat sheet was inspired by the RstudioData Wrangling Cheatsheet[1], written by Irv Lustig, Princeton Consultants[2].
5+
This cheat sheet was inspired by the RStudio Data Wrangling Cheatsheet[1], written by Irv Lustig, Princeton Consultants[2].
66

77
[1]: https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf
88
[2]: http://www.princetonoptimization.com/

doc/source/install.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ Optional Dependencies
259259
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage, Version 3.4.2 or higher
260260
* `pyarrow <http://arrow.apache.org/docs/python/>`__ (>= 0.9.0): necessary for feather-based storage.
261261
* `Apache Parquet <https://parquet.apache.org/>`__, either `pyarrow <http://arrow.apache.org/docs/python/>`__ (>= 0.7.0) or `fastparquet <https://fastparquet.readthedocs.io/en/latest>`__ (>= 0.2.1) for parquet-based storage. The `snappy <https://pypi.org/project/python-snappy>`__ and `brotli <https://pypi.org/project/brotlipy>`__ are available for compression support.
262-
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 1.0.8 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
262+
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 1.1.4 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
263263

264264
* `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
265265
* `pymysql <https://github.com/PyMySQL/PyMySQL>`__: for MySQL.

doc/source/whatsnew/v0.25.0.rst

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,13 @@ If installed, we now require:
123123
+=================+=================+==========+
124124
| beautifulsoup4 | 4.4.1 | |
125125
+-----------------+-----------------+----------+
126-
| openpyxl | 2.2.6 | |
126+
| openpyxl | 2.4.0 | |
127127
+-----------------+-----------------+----------+
128-
| pymysql | 0.6.6 | |
128+
| pymysql | 0.7.9 | |
129129
+-----------------+-----------------+----------+
130130
| pytz | 2015.4 | |
131131
+-----------------+-----------------+----------+
132-
| sqlalchemy | 1.0.8 | |
132+
| sqlalchemy | 1.1.4 | |
133133
+-----------------+-----------------+----------+
134134
| xlsxwriter | 0.7.7 | |
135135
+-----------------+-----------------+----------+
@@ -175,7 +175,8 @@ Performance Improvements
175175
- Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is
176176
int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`)
177177
- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`)
178-
178+
- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`)
179+
- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`)
179180

180181
.. _whatsnew_0250.bug_fixes:
181182

@@ -208,9 +209,11 @@ Timedelta
208209
Timezones
209210
^^^^^^^^^
210211

212+
- Bug in :func:`DatetimeIndex.to_frame` where timezone aware data would be converted to timezone naive data (:issue:`25809`)
211213
- Bug in :func:`to_datetime` with ``utc=True`` and datetime strings that would apply previously parsed UTC offsets to subsequent arguments (:issue:`24992`)
212214
- Bug in :func:`Timestamp.tz_localize` and :func:`Timestamp.tz_convert` does not propagate ``freq`` (:issue:`25241`)
213215
- Bug in :func:`Series.at` where setting :class:`Timestamp` with timezone raises ``TypeError`` (:issue:`25506`)
216+
- Bug in :func:`DataFrame.update` when updating with timezone aware data would return timezone naive data (:issue:`25807`)
214217

215218
Numeric
216219
^^^^^^^
@@ -219,6 +222,7 @@ Numeric
219222
- Bug in :meth:`to_numeric` in which numbers were being coerced to float, even though ``errors`` was not ``coerce`` (:issue:`24910`)
220223
- Bug in error messages in :meth:`DataFrame.corr` and :meth:`Series.corr`. Added the possibility of using a callable. (:issue:`25729`)
221224
- Bug in :meth:`Series.divmod` and :meth:`Series.rdivmod` which would raise an (incorrect) ``ValueError`` rather than return a pair of :class:`Series` objects as result (:issue:`25557`)
225+
- Raises a helpful exception when a non-numeric index is sent to :meth:`interpolate` with methods which require numeric index. (:issue:`21662`)
222226
- Fixed bug where casting all-boolean array to integer extension array failed (:issue:`25211`)
223227
-
224228
-
@@ -242,7 +246,7 @@ Strings
242246
Interval
243247
^^^^^^^^
244248

245-
-
249+
- Construction of :class:`Interval` is restricted to numeric, :class:`Timestamp` and :class:`Timedelta` endpoints (:issue:`23013`)
246250
-
247251
-
248252

@@ -280,7 +284,7 @@ I/O
280284
- :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AsseertionError`` (:issue:`25608`)
281285
- Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` that would lead to incorrect output when the ``header`` keyword is used (:issue:`16718`)
282286
- Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`)
283-
-
287+
- Improved performance in :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` when converting columns that have missing values (:issue:`25772`)
284288

285289

286290
Plotting

pandas/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636

3737
from datetime import datetime
3838

39+
from pandas._config import (get_option, set_option, reset_option,
40+
describe_option, option_context, options)
41+
3942
# let init-time option registration happen
4043
import pandas.core.config_init
4144

pandas/_config/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,9 @@
22
pandas._config is considered explicitly upstream of everything else in pandas,
33
should have no intra-pandas dependencies.
44
"""
5+
__all__ = ["config", "get_option", "set_option", "reset_option",
6+
"describe_option", "option_context", "options"]
7+
from pandas._config import config
8+
from pandas._config.config import (
9+
describe_option, get_option, option_context, options, reset_option,
10+
set_option)

pandas/core/config.py renamed to pandas/_config/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,7 @@ def config_prefix(prefix):
695695
696696
Example:
697697
698-
import pandas.core.config as cf
698+
import pandas._config.config as cf
699699
with cf.config_prefix("display.font"):
700700
cf.register_option("color", "red")
701701
cf.register_option("size", " 5 pt")

pandas/_libs/interval.pyx

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ cnp.import_array()
2020
cimport pandas._libs.util as util
2121

2222
from pandas._libs.hashtable cimport Int64Vector, Int64VectorData
23+
from pandas._libs.tslibs.util cimport is_integer_object, is_float_object
2324

2425
from pandas._libs.tslibs import Timestamp
26+
from pandas._libs.tslibs.timedeltas import Timedelta
2527
from pandas._libs.tslibs.timezones cimport tz_compare
2628

2729

@@ -104,12 +106,7 @@ cdef class IntervalMixin(object):
104106
@property
105107
def length(self):
106108
"""Return the length of the Interval"""
107-
try:
108-
return self.right - self.left
109-
except TypeError:
110-
# length not defined for some types, e.g. string
111-
msg = 'cannot compute length between {left!r} and {right!r}'
112-
raise TypeError(msg.format(left=self.left, right=self.right))
109+
return self.right - self.left
113110

114111
def _check_closed_matches(self, other, name='other'):
115112
"""Check if the closed attribute of `other` matches.
@@ -250,6 +247,10 @@ cdef class Interval(IntervalMixin):
250247
def __init__(self, left, right, str closed='right'):
251248
# note: it is faster to just do these checks than to use a special
252249
# constructor (__cinit__/__new__) to avoid them
250+
251+
self._validate_endpoint(left)
252+
self._validate_endpoint(right)
253+
253254
if closed not in _VALID_CLOSED:
254255
msg = "invalid option for 'closed': {closed}".format(closed=closed)
255256
raise ValueError(msg)
@@ -266,6 +267,14 @@ cdef class Interval(IntervalMixin):
266267
self.right = right
267268
self.closed = closed
268269

270+
def _validate_endpoint(self, endpoint):
271+
# GH 23013
272+
if not (is_integer_object(endpoint) or is_float_object(endpoint) or
273+
isinstance(endpoint, (Timestamp, Timedelta))):
274+
msg = ("Only numeric, Timestamp and Timedelta endpoints "
275+
"are allowed when constructing an Interval.")
276+
raise ValueError(msg)
277+
269278
def __hash__(self):
270279
return hash((self.left, self.right, self.closed))
271280

pandas/_libs/khash.pxd

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,17 @@ cdef extern from "khash_python.h":
5656

5757
bint kh_exist_str(kh_str_t*, khiter_t) nogil
5858

59+
ctypedef struct kh_str_starts_t:
60+
kh_str_t *table
61+
int starts[256]
62+
63+
kh_str_starts_t* kh_init_str_starts() nogil
64+
khint_t kh_put_str_starts_item(kh_str_starts_t* table, char* key,
65+
int* ret) nogil
66+
khint_t kh_get_str_starts_item(kh_str_starts_t* table, char* key) nogil
67+
void kh_destroy_str_starts(kh_str_starts_t*) nogil
68+
void kh_resize_str_starts(kh_str_starts_t*, khint_t) nogil
69+
5970
ctypedef struct kh_int64_t:
6071
khint_t n_buckets, size, n_occupied, upper_bound
6172
uint32_t *flags

0 commit comments

Comments
 (0)