Skip to content

loc row assignment with NaN and NaT coerces to either NaN or NaT #12499

Open
@jonathanstrong

Description

@jonathanstrong
import datetime
import pandas as pd
import pytz
data = [{'one': 0, 'two': datetime.datetime(2016, 3, 1, 3, 13, 22, 98986, tzinfo=pytz.timezone('UTC'))}]
df = pd.DataFrame(data)
df.loc[1] = [np.nan, np.datetime64('NaT')]

traceback

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-14-7cc4bcff4319> in <module>()
      4 data = [{'one': 0, 'two': datetime.datetime(2016, 3, 1, 3, 13, 22, 98986, tzinfo=pytz.timezone('UTC'))}]
      5 df = pd.DataFrame(data)
----> 6 df.loc[1] = [np.nan, np.datetime64('NaT')]

/home/jstrong/src/envs/vc4/local/lib/python2.7/site-packages/pandas/core/indexing.pyc in __setitem__(self, key, value)
    115     def __setitem__(self, key, value):
    116         indexer = self._get_setitem_indexer(key)
--> 117         self._setitem_with_indexer(indexer, value)
    118 
    119     def _has_valid_type(self, k, axis):

/home/jstrong/src/envs/vc4/local/lib/python2.7/site-packages/pandas/core/indexing.pyc in _setitem_with_indexer(self, indexer, value)
    337                         value = Series(value,index=self.obj.columns,name=indexer)
    338 
--> 339                     self.obj._data = self.obj.append(value)._data
    340                     self.obj._maybe_update_cacher(clear=True)
    341                     return self.obj

/home/jstrong/src/envs/vc4/local/lib/python2.7/site-packages/pandas/core/frame.pyc in append(self, other, ignore_index, verify_integrity)
   4229             to_concat = [self, other]
   4230         return concat(to_concat, ignore_index=ignore_index,
-> 4231                       verify_integrity=verify_integrity)
   4232 
   4233     def join(self, other, on=None, how='left', lsuffix='', rsuffix='',

/home/jstrong/src/envs/vc4/local/lib/python2.7/site-packages/pandas/tools/merge.pyc in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
    811                        verify_integrity=verify_integrity,
    812                        copy=copy)
--> 813     return op.get_result()
    814 
    815 

/home/jstrong/src/envs/vc4/local/lib/python2.7/site-packages/pandas/tools/merge.pyc in get_result(self)
    993 
    994             new_data = concatenate_block_managers(
--> 995                 mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy)
    996             if not self.copy:
    997                 new_data._consolidate_inplace()

/home/jstrong/src/envs/vc4/local/lib/python2.7/site-packages/pandas/core/internals.pyc in concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy)
   4454                                                 copy=copy),
   4455                          placement=placement)
-> 4456               for placement, join_units in concat_plan]
   4457 
   4458     return BlockManager(blocks, axes)

/home/jstrong/src/envs/vc4/local/lib/python2.7/site-packages/pandas/core/internals.pyc in concatenate_join_units(join_units, concat_axis, copy)
   4551     to_concat = [ju.get_reindexed_values(empty_dtype=empty_dtype,
   4552                                          upcasted_na=upcasted_na)
-> 4553                  for ju in join_units]
   4554 
   4555     if len(to_concat) == 1:

/home/jstrong/src/envs/vc4/local/lib/python2.7/site-packages/pandas/core/internals.pyc in get_reindexed_values(self, empty_dtype, upcasted_na)
   4799 
   4800             if self.is_null and not getattr(self.block,'is_categorical',None):
-> 4801                 missing_arr = np.empty(self.shape, dtype=empty_dtype)
   4802                 if np.prod(self.shape):
   4803                     # NumPy 1.6 workaround: this statement gets strange if all

TypeError: data type not understood

Same if you loc[1] = [np.nan, np.nan]. Seems the problem is loc assignment involving NaN for a datetime index column. Not sure the full breadth of the bug.

Expected Output

   one                              two
0    0 2016-03-01 03:13:22.098986+00:00
1    1                              NaT

output of pd.show_versions()

INSTALLED VERSIONS
------------------
commit: None
python: 2.7.6.final.0
python-bits: 64
OS: Linux
OS-release: 3.13.0-79-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8

pandas: 0.17.1
nose: 1.3.7
pip: 8.0.2
setuptools: 0.6
Cython: 0.23.4
numpy: 1.10.4
scipy: 0.17.0
statsmodels: 0.6.1
IPython: 4.0.1
sphinx: None
patsy: 0.4.1
dateutil: 2.5.0
pytz: 2015.7
blosc: None
bottleneck: 1.0.0
tables: 3.2.2
numexpr: 2.4.6
matplotlib: 1.5.0+1301.g7b517da
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: 3.4.1
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.11
pymysql: None
psycopg2: 2.6.1 (dt dec pq3 ext lo64)
Jinja2: None

Metadata

Metadata

Assignees

No one assigned

    Labels

    BugIndexingRelated to indexing on series/frames, not to indexes themselvesTimezonesTimezone data dtype

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions