Description
Code Sample, a copy-pastable example if possible
In [2]: df = pd.DataFrame(1, index=list(range(10))*10, columns=[0]).reset_index()
In [3]: groups = []
In [4]: def store(group):
...: groups.append(group)
...:
In [5]: df.groupby('index').apply(store)
Out[5]:
Empty DataFrame
Columns: []
Index: []
In [6]: groups[-1]
Out[6]: ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/home/nobackup/repo/ipython/IPython/core/formatters.py in __call__(self, obj)
668 type_pprinters=self.type_printers,
669 deferred_pprinters=self.deferred_printers)
--> 670 printer.pretty(obj)
671 printer.flush()
672 return stream.getvalue()
/home/nobackup/repo/ipython/IPython/lib/pretty.py in pretty(self, obj)
381 if callable(meth):
382 return meth(obj, self, cycle)
--> 383 return _default_pprint(obj, self, cycle)
384 finally:
385 self.end_group()
/home/nobackup/repo/ipython/IPython/lib/pretty.py in _default_pprint(obj, p, cycle)
501 if _safe_getattr(klass, '__repr__', None) not in _baseclass_reprs:
502 # A user-provided repr. Find newlines and replace them with p.break_()
--> 503 _repr_pprint(obj, p, cycle)
504 return
505 p.begin_group(1, '<')
/home/nobackup/repo/ipython/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
692 """A pprint that just redirects to the normal repr function."""
693 # Find newlines and replace them with p.break_()
--> 694 output = repr(obj)
695 for idx,output_line in enumerate(output.splitlines()):
696 if idx:
/home/nobackup/repo/pandas/pandas/core/base.py in __repr__(self)
78 Yields Bytestring in Py2, Unicode String in py3.
79 """
---> 80 return str(self)
81
82
/home/nobackup/repo/pandas/pandas/core/base.py in __str__(self)
57
58 if compat.PY3:
---> 59 return self.__unicode__()
60 return self.__bytes__()
61
/home/nobackup/repo/pandas/pandas/core/frame.py in __unicode__(self)
627 width = None
628 self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
--> 629 line_width=width, show_dimensions=show_dimensions)
630
631 return buf.getvalue()
/home/nobackup/repo/pandas/pandas/core/frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, line_width, max_rows, max_cols, show_dimensions)
1646 max_cols=max_cols,
1647 show_dimensions=show_dimensions)
-> 1648 formatter.to_string()
1649
1650 if buf is None:
/home/nobackup/repo/pandas/pandas/io/formats/format.py in to_string(self)
590 else:
591
--> 592 strcols = self._to_str_columns()
593 if self.line_width is None: # no need to wrap around just print
594 # the whole frame
/home/nobackup/repo/pandas/pandas/io/formats/format.py in _to_str_columns(self)
530 adj=self.adj)
531
--> 532 max_len = max(np.max([self.adj.len(x) for x in fmt_values]),
533 header_colwidth)
534 cheader = self.adj.justify(cheader, max_len, mode=self.justify)
/home/pietro/.local/lib/python3.5/site-packages/numpy/core/fromnumeric.py in amax(a, axis, out, keepdims)
2250
2251 return _methods._amax(a, axis=axis,
-> 2252 out=out, **kwargs)
2253
2254
/home/pietro/.local/lib/python3.5/site-packages/numpy/core/_methods.py in _amax(a, axis, out, keepdims)
24 # small reductions
25 def _amax(a, axis=None, out=None, keepdims=False):
---> 26 return umr_maximum(a, axis, None, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
ValueError: zero-size array to reduction operation maximum which has no identity
Problem description
I understand that just replacing groups.append(group)
with groups.append(group.copy())
solves this specific case, that freeing memory of pandas
objects is difficult because of cyclic references, and that this is an atypical function to apply()
... but still it looks like some dereferencing code could be made more robust.
Expected Output
In [7]: groups[0]
Out[7]:
index 0
0 0 1
10 0 1
20 0 1
30 0 1
40 0 1
50 0 1
60 0 1
70 0 1
80 0 1
90 0 1
Output of pd.show_versions()
INSTALLED VERSIONS
commit: ad7d051
python: 3.5.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.9.0-3-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: it_IT.UTF-8
LOCALE: it_IT.UTF-8
pandas: 0.21.0.dev+546.gad7d051bd
pytest: 3.0.6
pip: 9.0.1
setuptools: None
Cython: 0.25.2
numpy: 1.12.1
scipy: 0.19.0
pyarrow: None
xarray: None
IPython: 5.1.0.dev
sphinx: 1.5.6
patsy: 0.4.1
dateutil: 2.6.0
pytz: 2017.2
blosc: None
bottleneck: 1.2.1
tables: 3.3.0
numexpr: 2.6.1
feather: 0.3.1
matplotlib: 2.0.2
openpyxl: None
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.6
lxml: None
bs4: 4.5.3
html5lib: 0.999999999
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.9.6
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: 0.2.1