Skip to content

Commit f433861

Browse files
authored
Merge pull request #102 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 8d7d569 + 4f9bc8a commit f433861

37 files changed

+806
-728
lines changed

doc/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@
195195

196196
# The theme to use for HTML and HTML Help pages. Major themes that come with
197197
# Sphinx are currently 'default' and 'sphinxdoc'.
198-
html_theme = "pandas_sphinx_theme"
198+
html_theme = "pydata_sphinx_theme"
199199

200200
# The style sheet to use for HTML and HTML Help pages. A file of that name
201201
# must exist either in Sphinx' static/ path, or in one of the custom paths

doc/source/getting_started/intro_tutorials/03_subset_data.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ name of the column of interest.
8888
</ul>
8989

9090
Each column in a :class:`DataFrame` is a :class:`Series`. As a single column is
91-
selected, the returned object is a pandas :class:`DataFrame`. We can verify this
91+
selected, the returned object is a pandas :class:`Series`. We can verify this
9292
by checking the type of the output:
9393

9494
.. ipython:: python

doc/source/user_guide/scale.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
246246
We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.
247247

248248
.. ipython:: python
249+
:okwarning:
249250
250251
import dask.dataframe as dd
251252

doc/source/whatsnew/v1.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ Other
405405
- Fixed :func:`pandas.testing.assert_series_equal` to correctly raise if left object is a different subclass with ``check_series_type=True`` (:issue:`32670`).
406406
- :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:32538`)
407407
- Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`)
408+
- Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`)
408409

409410
.. ---------------------------------------------------------------------------
410411

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,5 +104,5 @@ dependencies:
104104
- pyreadstat # pandas.read_spss
105105
- tabulate>=0.8.3 # DataFrame.to_markdown
106106
- pip:
107-
- git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
107+
- git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master
108108
- git+https://github.com/numpy/numpydoc

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
1212
from pandas._libs.tslibs.util cimport get_c_string
1313
from pandas._libs.missing cimport C_NA
1414

15+
cdef extern from "Python.h":
16+
void PyErr_Clear()
17+
1518
{{py:
1619

1720
# name, dtype, c_type
@@ -193,7 +196,7 @@ cdef class StringVector:
193196

194197
append_data_string(self.data, x)
195198

196-
cdef extend(self, ndarray[:] x):
199+
cdef extend(self, ndarray[object] x):
197200
for i in range(len(x)):
198201
self.append(x[i])
199202

@@ -238,7 +241,7 @@ cdef class ObjectVector:
238241
self.external_view_exists = True
239242
return self.ao
240243

241-
cdef extend(self, ndarray[:] x):
244+
cdef extend(self, ndarray[object] x):
242245
for i in range(len(x)):
243246
self.append(x[i])
244247

@@ -790,6 +793,9 @@ cdef class StringHashTable(HashTable):
790793
else:
791794
# if ignore_na is False, we also stringify NaN/None/etc.
792795
v = get_c_string(<str>val)
796+
if v == NULL:
797+
PyErr_Clear()
798+
v = get_c_string(<str>repr(val))
793799
vecs[i] = v
794800

795801
# compute

pandas/_libs/internals.pyx

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -378,25 +378,23 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
378378

379379
object blkno
380380
object group_dict = defaultdict(list)
381-
int64_t[:] res_view
382381

383382
n = blknos.shape[0]
384-
385-
if n == 0:
386-
return
387-
383+
result = list()
388384
start = 0
389385
cur_blkno = blknos[start]
390386

391-
if group is False:
387+
if n == 0:
388+
pass
389+
elif group is False:
392390
for i in range(1, n):
393391
if blknos[i] != cur_blkno:
394-
yield cur_blkno, slice(start, i)
392+
result.append((cur_blkno, slice(start, i)))
395393

396394
start = i
397395
cur_blkno = blknos[i]
398396

399-
yield cur_blkno, slice(start, n)
397+
result.append((cur_blkno, slice(start, n)))
400398
else:
401399
for i in range(1, n):
402400
if blknos[i] != cur_blkno:
@@ -409,19 +407,20 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
409407

410408
for blkno, slices in group_dict.items():
411409
if len(slices) == 1:
412-
yield blkno, slice(slices[0][0], slices[0][1])
410+
result.append((blkno, slice(slices[0][0], slices[0][1])))
413411
else:
414412
tot_len = sum(stop - start for start, stop in slices)
415-
result = np.empty(tot_len, dtype=np.int64)
416-
res_view = result
413+
arr = np.empty(tot_len, dtype=np.int64)
417414

418415
i = 0
419416
for start, stop in slices:
420417
for diff in range(start, stop):
421-
res_view[i] = diff
418+
arr[i] = diff
422419
i += 1
423420

424-
yield blkno, result
421+
result.append((blkno, arr))
422+
423+
return result
425424

426425

427426
def get_blkno_placements(blknos, group: bool = True):

pandas/_libs/parsers.pyx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,6 @@ cdef class TextReader:
792792
self._tokenize_rows(1)
793793

794794
header = [ self.names ]
795-
data_line = 0
796795

797796
if self.parser.lines < 1:
798797
field_count = len(header[0])

pandas/_libs/src/ujson/python/date_conversions.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) {
6767
}
6868

6969
/* Convert PyDatetime To ISO C-string. mutates len */
70-
char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
70+
char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
7171
size_t *len) {
7272
npy_datetimestruct dts;
7373
int ret;
@@ -98,7 +98,7 @@ char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
9898
return result;
9999
}
100100

101-
npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) {
101+
npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {
102102
npy_datetimestruct dts;
103103
int ret;
104104

pandas/_libs/src/ujson/python/date_conversions.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#define PY_SSIZE_T_CLEAN
55
#include <Python.h>
66
#include <numpy/ndarraytypes.h>
7-
#include "datetime.h"
87

98
// Scales value inplace from nanosecond resolution to unit resolution
109
int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit);
@@ -23,10 +22,10 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base);
2322
// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z
2423
// while base="ns" yields "2020-01-01T00:00:00.000000000Z"
2524
// len is mutated to save the length of the returned string
26-
char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, size_t *len);
25+
char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, size_t *len);
2726

2827
// Convert a Python Date/Datetime to Unix epoch with resolution base
29-
npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base);
28+
npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base);
3029

3130
char *int64ToIsoDuration(int64_t value, size_t *len);
3231

pandas/_libs/src/ujson/python/objToJSON.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
14511451
} else {
14521452
// datetime.* objects don't follow above rules
14531453
nanosecVal =
1454-
PyDateTimeToEpoch((PyDateTime_Date *)item, NPY_FR_ns);
1454+
PyDateTimeToEpoch(item, NPY_FR_ns);
14551455
}
14561456
}
14571457
}
@@ -1469,8 +1469,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
14691469
if (type_num == NPY_DATETIME) {
14701470
cLabel = int64ToIso(nanosecVal, base, &len);
14711471
} else {
1472-
cLabel = PyDateTimeToIso((PyDateTime_Date *)item,
1473-
base, &len);
1472+
cLabel = PyDateTimeToIso(item, base, &len);
14741473
}
14751474
}
14761475
if (cLabel == NULL) {
@@ -1683,7 +1682,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
16831682
NPY_DATETIMEUNIT base =
16841683
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
16851684
GET_TC(tc)->longValue =
1686-
PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
1685+
PyDateTimeToEpoch(obj, base);
16871686
tc->type = JT_LONG;
16881687
}
16891688
return;
@@ -1710,7 +1709,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
17101709
NPY_DATETIMEUNIT base =
17111710
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
17121711
GET_TC(tc)->longValue =
1713-
PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
1712+
PyDateTimeToEpoch(obj, base);
17141713
tc->type = JT_LONG;
17151714
}
17161715
return;

pandas/_libs/tslibs/src/datetime/np_datetime.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
2121
#endif // NPY_NO_DEPRECATED_API
2222

2323
#include <Python.h>
24-
#include <datetime.h>
2524

2625
#include <numpy/arrayobject.h>
2726
#include <numpy/arrayscalars.h>
@@ -313,15 +312,14 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a,
313312
* object into a NumPy npy_datetimestruct. Uses tzinfo (if present)
314313
* to convert to UTC time.
315314
*
316-
* While the C API has PyDate_* and PyDateTime_* functions, the following
317-
* implementation just asks for attributes, and thus supports
318-
* datetime duck typing. The tzinfo time zone conversion would require
319-
* this style of access anyway.
315+
* The following implementation just asks for attributes, and thus
316+
* supports datetime duck typing. The tzinfo time zone conversion
317+
* requires this style of access as well.
320318
*
321319
* Returns -1 on error, 0 on success, and 1 (with no error set)
322320
* if obj doesn't have the needed date or datetime attributes.
323321
*/
324-
int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
322+
int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
325323
npy_datetimestruct *out) {
326324
// Assumes that obj is a valid datetime object
327325
PyObject *tmp;

pandas/_libs/tslibs/src/datetime/np_datetime.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
2222
#endif // NPY_NO_DEPRECATED_API
2323

2424
#include <numpy/ndarraytypes.h>
25-
#include <datetime.h>
2625

2726
typedef struct {
2827
npy_int64 days;
@@ -35,7 +34,7 @@ extern const npy_datetimestruct _NS_MAX_DTS;
3534
// stuff pandas needs
3635
// ----------------------------------------------------------------------------
3736

38-
int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
37+
int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
3938
npy_datetimestruct *out);
4039

4140
npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,

0 commit comments

Comments
 (0)