sthagen
diff --git a/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion b/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/getting_started/intro_tutorials/03_subset_data.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/getting_started/intro_tutorials/03_subset_data.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/scale.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/user_guide/scale.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.1.0.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.1.0.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎environment.yml
Lines changed: 1 addition & 1 deletion b/‎environment.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/hashtable_class_helper.pxi.in
Lines changed: 8 additions & 2 deletions b/‎pandas/_libs/hashtable_class_helper.pxi.in
Lines changed: 8 additions & 2 deletions
diff --git a/‎pandas/_libs/internals.pyx
Lines changed: 12 additions & 13 deletions b/‎pandas/_libs/internals.pyx
Lines changed: 12 additions & 13 deletions
diff --git a/‎pandas/_libs/parsers.pyx
Lines changed: 0 additions & 1 deletion b/‎pandas/_libs/parsers.pyx
Lines changed: 0 additions & 1 deletion
diff --git a/‎pandas/_libs/src/ujson/python/date_conversions.c
Lines changed: 2 additions & 2 deletions b/‎pandas/_libs/src/ujson/python/date_conversions.c
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/_libs/src/ujson/python/date_conversions.h
Lines changed: 2 additions & 3 deletions b/‎pandas/_libs/src/ujson/python/date_conversions.h
Lines changed: 2 additions & 3 deletions
diff --git a/‎pandas/_libs/src/ujson/python/objToJSON.c
Lines changed: 4 additions & 5 deletions b/‎pandas/_libs/src/ujson/python/objToJSON.c
Lines changed: 4 additions & 5 deletions
diff --git a/‎pandas/_libs/tslibs/src/datetime/np_datetime.c
Lines changed: 4 additions & 6 deletions b/‎pandas/_libs/tslibs/src/datetime/np_datetime.c
Lines changed: 4 additions & 6 deletions
diff --git a/‎pandas/_libs/tslibs/src/datetime/np_datetime.h
Lines changed: 1 addition & 2 deletions b/‎pandas/_libs/tslibs/src/datetime/np_datetime.h
Lines changed: 1 addition & 2 deletions
@@ -195,7 +195,7 @@
 
 # The theme to use for HTML and HTML Help pages.  Major themes that come with
 # Sphinx are currently 'default' and 'sphinxdoc'.
-html_theme = "pandas_sphinx_theme"
+html_theme = "pydata_sphinx_theme"
 
 # The style sheet to use for HTML and HTML Help pages. A file of that name
 # must exist either in Sphinx' static/ path, or in one of the custom paths
 
@@ -88,7 +88,7 @@ name of the column of interest.
     </ul>
 
 Each column in a :class:`DataFrame` is a :class:`Series`. As a single column is
-selected, the returned object is a pandas :class:`DataFrame`. We can verify this
+selected, the returned object is a pandas :class:`Series`. We can verify this
 by checking the type of the output:
 
 .. ipython:: python
 
@@ -246,6 +246,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
 We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.
 
 .. ipython:: python
+   :okwarning:
 
    import dask.dataframe as dd
 
 
@@ -405,6 +405,7 @@ Other
 - Fixed :func:`pandas.testing.assert_series_equal` to correctly raise if left object is a different subclass with ``check_series_type=True`` (:issue:`32670`).
 - :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:32538`)
 - Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`)
+- Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`)
 
 .. ---------------------------------------------------------------------------
 
 
@@ -104,5 +104,5 @@ dependencies:
   - pyreadstat  # pandas.read_spss
   - tabulate>=0.8.3  # DataFrame.to_markdown
   - pip:
-    - git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
+    - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master
     - git+https://github.com/numpy/numpydoc
@@ -12,6 +12,9 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 from pandas._libs.tslibs.util cimport get_c_string
 from pandas._libs.missing cimport C_NA
 
+cdef extern from "Python.h":
+    void PyErr_Clear()
+
 {{py:
 
 # name, dtype, c_type
@@ -193,7 +196,7 @@ cdef class StringVector:
 
         append_data_string(self.data, x)
 
-    cdef extend(self, ndarray[:] x):
+    cdef extend(self, ndarray[object] x):
         for i in range(len(x)):
             self.append(x[i])
 
@@ -238,7 +241,7 @@ cdef class ObjectVector:
         self.external_view_exists = True
         return self.ao
 
-    cdef extend(self, ndarray[:] x):
+    cdef extend(self, ndarray[object] x):
         for i in range(len(x)):
             self.append(x[i])
 
@@ -790,6 +793,9 @@ cdef class StringHashTable(HashTable):
             else:
                 # if ignore_na is False, we also stringify NaN/None/etc.
                 v = get_c_string(<str>val)
+                if v == NULL:
+                    PyErr_Clear()
+                    v = get_c_string(<str>repr(val))
                 vecs[i] = v
 
         # compute
 
@@ -378,25 +378,23 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
 
         object blkno
         object group_dict = defaultdict(list)
-        int64_t[:] res_view
 
     n = blknos.shape[0]
-
-    if n == 0:
-        return
-
+    result = list()
     start = 0
     cur_blkno = blknos[start]
 
-    if group is False:
+    if n == 0:
+        pass
+    elif group is False:
         for i in range(1, n):
             if blknos[i] != cur_blkno:
-                yield cur_blkno, slice(start, i)
+                result.append((cur_blkno, slice(start, i)))
 
                 start = i
                 cur_blkno = blknos[i]
 
-        yield cur_blkno, slice(start, n)
+        result.append((cur_blkno, slice(start, n)))
     else:
         for i in range(1, n):
             if blknos[i] != cur_blkno:
@@ -409,19 +407,20 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
 
         for blkno, slices in group_dict.items():
             if len(slices) == 1:
-                yield blkno, slice(slices[0][0], slices[0][1])
+                result.append((blkno, slice(slices[0][0], slices[0][1])))
             else:
                 tot_len = sum(stop - start for start, stop in slices)
-                result = np.empty(tot_len, dtype=np.int64)
-                res_view = result
+                arr = np.empty(tot_len, dtype=np.int64)
 
                 i = 0
                 for start, stop in slices:
                     for diff in range(start, stop):
-                        res_view[i] = diff
+                        arr[i] = diff
                         i += 1
 
-                yield blkno, result
+                result.append((blkno, arr))
+
+    return result
 
 
 def get_blkno_placements(blknos, group: bool = True):
 
@@ -792,7 +792,6 @@ cdef class TextReader:
                 self._tokenize_rows(1)
 
             header = [ self.names ]
-            data_line = 0
 
             if self.parser.lines < 1:
                 field_count = len(header[0])
 
@@ -67,7 +67,7 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) {
 }
 
 /* Convert PyDatetime To ISO C-string. mutates len */
-char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
+char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
                       size_t *len) {
     npy_datetimestruct dts;
     int ret;
@@ -98,7 +98,7 @@ char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
     return result;
 }
 
-npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) {
+npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {
     npy_datetimestruct dts;
     int ret;
 
 
@@ -4,7 +4,6 @@
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
 #include <numpy/ndarraytypes.h>
-#include "datetime.h"
 
 // Scales value inplace from nanosecond resolution to unit resolution
 int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit);
@@ -23,10 +22,10 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base);
 // up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z
 // while base="ns" yields "2020-01-01T00:00:00.000000000Z"
 // len is mutated to save the length of the returned string
-char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, size_t *len);
+char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, size_t *len);
 
 // Convert a Python Date/Datetime to Unix epoch with resolution base
-npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base);
+npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base);
 
 char *int64ToIsoDuration(int64_t value, size_t *len);
 
 
@@ -1451,7 +1451,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
                 } else {
                     // datetime.* objects don't follow above rules
                     nanosecVal =
-                        PyDateTimeToEpoch((PyDateTime_Date *)item, NPY_FR_ns);
+                        PyDateTimeToEpoch(item, NPY_FR_ns);
                 }
             }
         }
@@ -1469,8 +1469,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
                         if (type_num == NPY_DATETIME) {
                             cLabel = int64ToIso(nanosecVal, base, &len);
                         } else {
-                            cLabel = PyDateTimeToIso((PyDateTime_Date *)item,
-                                                     base, &len);
+                            cLabel = PyDateTimeToIso(item, base, &len);
                         }
                     }
                     if (cLabel == NULL) {
@@ -1683,7 +1682,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
             NPY_DATETIMEUNIT base =
                 ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
             GET_TC(tc)->longValue =
-                PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
+                PyDateTimeToEpoch(obj, base);
             tc->type = JT_LONG;
         }
         return;
@@ -1710,7 +1709,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
             NPY_DATETIMEUNIT base =
                 ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
             GET_TC(tc)->longValue =
-                PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
+                PyDateTimeToEpoch(obj, base);
             tc->type = JT_LONG;
         }
         return;
 
@@ -21,7 +21,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
 #endif  // NPY_NO_DEPRECATED_API
 
 #include <Python.h>
-#include <datetime.h>
 
 #include <numpy/arrayobject.h>
 #include <numpy/arrayscalars.h>
@@ -313,15 +312,14 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a,
  * object into a NumPy npy_datetimestruct.  Uses tzinfo (if present)
  * to convert to UTC time.
  *
- * While the C API has PyDate_* and PyDateTime_* functions, the following
- * implementation just asks for attributes, and thus supports
- * datetime duck typing. The tzinfo time zone conversion would require
- * this style of access anyway.
+ * The following implementation just asks for attributes, and thus 
+ * supports datetime duck typing. The tzinfo time zone conversion
+ * requires this style of access as well.
  *
  * Returns -1 on error, 0 on success, and 1 (with no error set)
  * if obj doesn't have the needed date or datetime attributes.
  */
-int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
+int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
                                          npy_datetimestruct *out) {
     // Assumes that obj is a valid datetime object
     PyObject *tmp;
 
@@ -22,7 +22,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
 #endif  // NPY_NO_DEPRECATED_API
 
 #include <numpy/ndarraytypes.h>
-#include <datetime.h>
 
 typedef struct {
         npy_int64 days;
@@ -35,7 +34,7 @@ extern const npy_datetimestruct _NS_MAX_DTS;
 // stuff pandas needs
 // ----------------------------------------------------------------------------
 
-int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
+int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
                                          npy_datetimestruct *out);
 
 npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,7 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) {`
`67`	`67`	`}`
`68`	`68`
`69`	`69`	`/* Convert PyDatetime To ISO C-string. mutates len */`
`70`		`-char PyDateTimeToIso(PyDateTime_Date obj, NPY_DATETIMEUNIT base,`
	`70`	`+char PyDateTimeToIso(PyObject obj, NPY_DATETIMEUNIT base,`
`71`	`71`	`size_t *len) {`
`72`	`72`	`npy_datetimestruct dts;`
`73`	`73`	`int ret;`
`@@ -98,7 +98,7 @@ char PyDateTimeToIso(PyDateTime_Date obj, NPY_DATETIMEUNIT base,`
`98`	`98`	`return result;`
`99`	`99`	`}`
`100`	`100`
`101`		`-npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) {`
	`101`	`+npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {`
`102`	`102`	`npy_datetimestruct dts;`
`103`	`103`	`int ret;`
`104`	`104`
Original file line number	Diff line number	Diff line change
`@@ -1451,7 +1451,7 @@ char *NpyArr_encodeLabels(PyArrayObject labels, PyObjectEncoder *enc,`
`1451`	`1451`	`} else {`
`1452`	`1452`	`// datetime.* objects don't follow above rules`
`1453`	`1453`	`nanosecVal =`
`1454`		`- PyDateTimeToEpoch((PyDateTime_Date *)item, NPY_FR_ns);`
	`1454`	`+ PyDateTimeToEpoch(item, NPY_FR_ns);`
`1455`	`1455`	`}`
`1456`	`1456`	`}`
`1457`	`1457`	`}`
`@@ -1469,8 +1469,7 @@ char *NpyArr_encodeLabels(PyArrayObject labels, PyObjectEncoder *enc,`
`1469`	`1469`	`if (type_num == NPY_DATETIME) {`
`1470`	`1470`	`cLabel = int64ToIso(nanosecVal, base, &len);`
`1471`	`1471`	`} else {`
`1472`		`- cLabel = PyDateTimeToIso((PyDateTime_Date *)item,`
`1473`		`- base, &len);`
	`1472`	`+ cLabel = PyDateTimeToIso(item, base, &len);`
`1474`	`1473`	`}`
`1475`	`1474`	`}`
`1476`	`1475`	`if (cLabel == NULL) {`
`@@ -1683,7 +1682,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {`
`1683`	`1682`	`NPY_DATETIMEUNIT base =`
`1684`	`1683`	`((PyObjectEncoder *)tc->encoder)->datetimeUnit;`
`1685`	`1684`	`GET_TC(tc)->longValue =`
`1686`		`- PyDateTimeToEpoch((PyDateTime_Date *)obj, base);`
	`1685`	`+ PyDateTimeToEpoch(obj, base);`
`1687`	`1686`	`tc->type = JT_LONG;`
`1688`	`1687`	`}`
`1689`	`1688`	`return;`
`@@ -1710,7 +1709,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {`
`1710`	`1709`	`NPY_DATETIMEUNIT base =`
`1711`	`1710`	`((PyObjectEncoder *)tc->encoder)->datetimeUnit;`
`1712`	`1711`	`GET_TC(tc)->longValue =`
`1713`		`- PyDateTimeToEpoch((PyDateTime_Date *)obj, base);`
	`1712`	`+ PyDateTimeToEpoch(obj, base);`
`1714`	`1713`	`tc->type = JT_LONG;`
`1715`	`1714`	`}`
`1716`	`1715`	`return;`