Description
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
(optional) I have confirmed this bug exists on the master branch of pandas.
In short, with Pandas 1.0.3 on Python 3.7.6, when I multiply <NA>
(the Int64 null type) by a Series with length over 10,000, I get ValueError: unknown type object
. Things work as expected when the length is less than or equal to 10,000.
Code Sample, a copy-pastable example
import numpy as np
import pandas as pd
%xmode Verbose
print(pd.__version__)
Int64NA = pd.Series([np.nan], dtype=pd.Int64Dtype()).iloc[0]
Int64NA * pd.Series(np.zeros(10001))
Actual output
$ ipython
Python 3.7.6 | packaged by conda-forge | (default, Mar 5 2020, 15:27:18)
Type 'copyright', 'credits' or 'license' for more information
IPython 7.13.0 -- An enhanced Interactive Python. Type '?' for help.
In [1]: import numpy as np
...: import pandas as pd
...: %xmode Verbose
...: print(pd.__version__)
...: Int64NA = pd.Series([np.nan], dtype=pd.Int64Dtype()).iloc[0]
...: Int64NA * pd.Series(np.zeros(10001))
Exception reporting mode: Verbose
1.0.3
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-e9964dfd52e1> in <module>
4 print(pd.__version__)
5 Int64NA = pd.Series([np.nan], dtype=pd.Int64Dtype()).iloc[0]
----> 6 Int64NA * pd.Series(np.zeros(10001))
global Int64NA = <NA>
global pd.Series = <class 'pandas.core.series.Series'>
global np.zeros = <built-in function zeros>
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self=0 0.0
1 0.0
2 0.0
3 ... 0.0
10000 0.0
Length: 10001, dtype: float64, other=<NA>)
62 other = item_from_zerodim(other)
63
---> 64 return method(self, other)
global method = undefined
self = 0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
...
9996 0.0
9997 0.0
9998 0.0
9999 0.0
10000 0.0
Length: 10001, dtype: float64
other = <NA>
65
66 return new_method
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/__init__.py in wrapper(left=0 0.0
1 0.0
2 0.0
3 ... 0.0
10000 0.0
Length: 10001, dtype: float64, right=<NA>)
501 lvalues = extract_array(left, extract_numpy=True)
502 rvalues = extract_array(right, extract_numpy=True)
--> 503 result = arithmetic_op(lvalues, rvalues, op, str_rep)
result = undefined
global arithmetic_op = <function arithmetic_op at 0x7f9e94b2e9e0>
lvalues = array([0., 0., 0., ..., 0., 0., 0.])
rvalues = <NA>
global op = undefined
global str_rep = undefined
504
505 return _construct_result(left, result, index=left.index, name=res_name)
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in arithmetic_op(left=array([0., 0., 0., ..., 0., 0., 0.]), right=<NA>, op=<function rmul>, str_rep='*')
195 else:
196 with np.errstate(all="ignore"):
--> 197 res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep)
res_values = undefined
global na_arithmetic_op = <function na_arithmetic_op at 0x7f9e94b2e950>
lvalues = array([0., 0., 0., ..., 0., 0., 0.])
rvalues = <NA>
op = <function rmul at 0x7f9e94b29b00>
str_rep = '*'
198
199 return res_values
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in na_arithmetic_op(left=array([0., 0., 0., ..., 0., 0., 0.]), right=<NA>, op=<function rmul>, str_rep='*')
147
148 try:
--> 149 result = expressions.evaluate(op, str_rep, left, right)
result = undefined
expressions.evaluate = <function evaluate at 0x7f9e938b8320>
op = <function rmul at 0x7f9e94b29b00>
str_rep = '*'
left = array([0., 0., 0., ..., 0., 0., 0.])
right = <NA>
150 except TypeError:
151 result = masked_arith_op(left, right, op)
/opt/conda/lib/python3.7/site-packages/pandas/core/computation/expressions.py in evaluate(op=<function rmul>, op_str='*', a=array([0., 0., 0., ..., 0., 0., 0.]), b=<NA>, use_numexpr=True)
206 use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b)
global _evaluate = <function _evaluate_numexpr at 0x7f9e938a1710>
op = <function rmul at 0x7f9e94b29b00>
op_str = '*'
a = array([0., 0., 0., ..., 0., 0., 0.])
b = <NA>
209 return _evaluate_standard(op, op_str, a, b)
210
/opt/conda/lib/python3.7/site-packages/pandas/core/computation/expressions.py in _evaluate_numexpr(op=<function rmul>, op_str='*', a=<NA>, b=array([0., 0., 0., ..., 0., 0., 0.]))
112 f"a_value {op_str} b_value",
113 local_dict={"a_value": a_value, "b_value": b_value},
--> 114 casting="safe",
global casting = undefined
115 )
116
/opt/conda/lib/python3.7/site-packages/numexpr/necompiler.py in evaluate(ex='a_value * b_value', local_dict={'a_value': <NA>, 'b_value': array([0., 0., 0., ..., 0., 0., 0.])}, global_dict=None, out=None, order='K', casting='safe', **kwargs={})
820 # Create a signature
821 signature = [(name, getType(arg)) for (name, arg) in
--> 822 zip(names, arguments)]
global zip = undefined
names = ['a_value', 'b_value']
arguments = [array(<NA>, dtype=object), array([0., 0., 0., ..., 0., 0., 0.])]
823
824 # Look up numexpr if possible.
/opt/conda/lib/python3.7/site-packages/numexpr/necompiler.py in <listcomp>(.0=<zip object>)
819
820 # Create a signature
--> 821 signature = [(name, getType(arg)) for (name, arg) in
global signature = undefined
name = 'a_value'
global getType = <function getType at 0x7f9e938b2ef0>
arg = array(<NA>, dtype=object)
global zip = undefined
global names = undefined
global arguments = undefined
822 zip(names, arguments)]
823
/opt/conda/lib/python3.7/site-packages/numexpr/necompiler.py in getType(a=array(<NA>, dtype=object))
701 if kind == 'S':
702 return bytes
--> 703 raise ValueError("unknown type %s" % a.dtype.name)
global ValueError = undefined
a.dtype.name = 'object'
704
705
ValueError: unknown type object
Problem description
I do not expect to see different behavior depending on whether or not the length of the data is greater than 10000.
Expected Output
0 <NA>
1 <NA>
2 <NA>
3 <NA>
4 <NA>
...
9995 <NA>
9996 <NA>
9997 <NA>
9998 <NA>
9999 <NA>
10000 <NA>
Length: 10001, dtype: object
Output of pd.show_versions()
In [2]: pd.show_versions()
INSTALLED VERSIONS
commit : None
python : 3.7.6.final.0
python-bits : 64
OS : Linux
OS-release : 4.15.0-1065-aws
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : de_DE.UTF-8
LANG : de_DE.UTF-8
LOCALE : de_DE.UTF-8
pandas : 1.0.3
numpy : 1.18.1
pytz : 2019.3
dateutil : 2.8.1
pip : 20.0.2
setuptools : 46.0.0.post20200311
Cython : 0.29.15
pytest : 5.4.1
hypothesis : None
sphinx : 3.0.1
blosc : None
feather : None
xlsxwriter : None
lxml.etree : 4.5.0
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 2.11.1
IPython : 7.13.0
pandas_datareader: None
bs4 : 4.8.2
bottleneck : None
fastparquet : None
gcsfs : None
lxml.etree : 4.5.0
matplotlib : 3.1.3
numexpr : 2.7.1
odfpy : None
openpyxl : 3.0.3
pandas_gbq : None
pyarrow : None
pytables : None
pytest : 5.4.1
pyxlsb : None
s3fs : None
scipy : 1.4.1
sqlalchemy : 1.3.15
tables : None
tabulate : None
xarray : 0.15.1
xlrd : 1.2.0
xlwt : None
xlsxwriter : None
numba : 0.48.0