Skip to content

Commit aca1a42

Browse files
committed
Merge pull request #4437 from cpcloud/fix-astype-calls
BUG: fix string truncation for astype(str)
2 parents 58f1137 + f0c1bd2 commit aca1a42

File tree

5 files changed

+37
-10
lines changed

5 files changed

+37
-10
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ pandas 0.13
117117
set _ref_locs (:issue:`4403`)
118118
- Fixed an issue where hist subplots were being overwritten when they were
119119
called using the top level matplotlib API (:issue:`4408`)
120+
- Fixed a bug where calling ``Series.astype(str)`` would truncate the string
121+
(:issue:`4405`, :issue:`4437`)
120122

121123
pandas 0.12
122124
===========

pandas/core/common.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
"""
44

55
import re
6-
from datetime import datetime
76
import codecs
87
import csv
98

@@ -1628,7 +1627,7 @@ def _is_sequence(x):
16281627
_ensure_object = algos.ensure_object
16291628

16301629

1631-
def _astype_nansafe(arr, dtype, copy = True):
1630+
def _astype_nansafe(arr, dtype, copy=True):
16321631
""" return a view if copy is False """
16331632
if not isinstance(dtype, np.dtype):
16341633
dtype = np.dtype(dtype)
@@ -1659,6 +1658,8 @@ def _astype_nansafe(arr, dtype, copy = True):
16591658
elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
16601659
# work around NumPy brokenness, #1987
16611660
return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
1661+
elif issubclass(dtype.type, compat.string_types):
1662+
return lib.astype_str(arr.ravel()).reshape(arr.shape)
16621663

16631664
if copy:
16641665
return arr.astype(dtype)

pandas/core/series.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
# pylint: disable=E1101,E1103
66
# pylint: disable=W0703,W0622,W0613,W0201
77

8-
from pandas import compat
98
import operator
109
from distutils.version import LooseVersion
1110
import types

pandas/lib.pyx

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,16 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
722722

723723
return result
724724

725+
cpdef ndarray[object] astype_str(ndarray arr):
726+
cdef:
727+
Py_ssize_t i, n = arr.size
728+
ndarray[object] result = np.empty(n, dtype=object)
729+
730+
for i in range(n):
731+
util.set_value_at(result, i, str(arr[i]))
732+
733+
return result
734+
725735
def clean_index_list(list obj):
726736
'''
727737
Utility used in pandas.core.index._ensure_index
@@ -838,7 +848,7 @@ def write_csv_rows(list data, list data_index, int nlevels, list cols, object wr
838848
def create_hdf_rows_2d(ndarray indexer0,
839849
object dtype,
840850
ndarray[np.uint8_t, ndim=1] mask,
841-
ndarray[np.uint8_t, ndim=1] searchable,
851+
ndarray[np.uint8_t, ndim=1] searchable,
842852
list values):
843853
""" return a list of objects ready to be converted to rec-array format """
844854

@@ -857,7 +867,7 @@ def create_hdf_rows_2d(ndarray indexer0,
857867
for i in range(n_indexer0):
858868

859869
if not mask[i]:
860-
870+
861871
tup = PyTuple_New(tup_size)
862872

863873
v = indexer0[i]
@@ -869,7 +879,7 @@ def create_hdf_rows_2d(ndarray indexer0,
869879
v = values[b][i]
870880
if searchable[b]:
871881
v = v[0]
872-
882+
873883
PyTuple_SET_ITEM(tup, b+1, v)
874884
Py_INCREF(v)
875885

@@ -882,8 +892,8 @@ def create_hdf_rows_2d(ndarray indexer0,
882892
@cython.wraparound(False)
883893
def create_hdf_rows_3d(ndarray indexer0, ndarray indexer1,
884894
object dtype,
885-
ndarray[np.uint8_t, ndim=2] mask,
886-
ndarray[np.uint8_t, ndim=1] searchable,
895+
ndarray[np.uint8_t, ndim=2] mask,
896+
ndarray[np.uint8_t, ndim=1] searchable,
887897
list values):
888898
""" return a list of objects ready to be converted to rec-array format """
889899

@@ -932,8 +942,8 @@ def create_hdf_rows_3d(ndarray indexer0, ndarray indexer1,
932942
@cython.wraparound(False)
933943
def create_hdf_rows_4d(ndarray indexer0, ndarray indexer1, ndarray indexer2,
934944
object dtype,
935-
ndarray[np.uint8_t, ndim=3] mask,
936-
ndarray[np.uint8_t, ndim=1] searchable,
945+
ndarray[np.uint8_t, ndim=3] mask,
946+
ndarray[np.uint8_t, ndim=1] searchable,
937947
list values):
938948
""" return a list of objects ready to be converted to rec-array format """
939949

pandas/tests/test_series.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import operator
66
import unittest
7+
import string
78

89
import nose
910

@@ -2029,6 +2030,7 @@ def test_timedelta64_functions(self):
20292030
expected = Series([timedelta(1)],dtype='timedelta64[ns]')
20302031
assert_series_equal(result,expected)
20312032

2033+
20322034
def test_sub_of_datetime_from_TimeSeries(self):
20332035
from pandas.core import common as com
20342036
from datetime import datetime
@@ -3354,6 +3356,19 @@ def test_astype_datetimes(self):
33543356
s = s.astype('O')
33553357
self.assert_(s.dtype == np.object_)
33563358

3359+
def test_astype_str(self):
3360+
# GH4405
3361+
digits = string.digits
3362+
s1 = Series([digits * 10, tm.rands(63), tm.rands(64),
3363+
tm.rands(1000)])
3364+
s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0])
3365+
types = (compat.text_type,) + (np.str_, np.unicode_)
3366+
for typ in types:
3367+
for s in (s1, s2):
3368+
res = s.astype(typ)
3369+
expec = s.map(compat.text_type)
3370+
assert_series_equal(res, expec)
3371+
33573372
def test_map(self):
33583373
index, data = tm.getMixedTypeDict()
33593374

0 commit comments

Comments
 (0)