Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the master branch of pandas.
Reproducible Example
import pandas as pd
df = pd.DataFrame([[1, 1, 1]], columns=["a", "b", "b"])
df.groupby("a", as_index=False).value_counts()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_16680/431465574.py in <module>
----> 1 df.groupby("a", as_index=False).value_counts()
c:\users\john\onedrive\documents\github\pandas_johnzangwill\pandas\core\groupby\generic.py in value_counts(self, subset, normalize, sort, ascending, dropna)
1756 if not self.as_index:
1757 # Convert to frame
-> 1758 result = result.reset_index(name="proportion" if normalize else "count")
1759 return result.__finalize__(self.obj, method="value_counts")
1760
c:\users\john\onedrive\documents\github\pandas_johnzangwill\pandas\util\_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
c:\users\john\onedrive\documents\github\pandas_johnzangwill\pandas\core\series.py in reset_index(self, level, drop, name, inplace)
1489
1490 df = self.to_frame(name)
-> 1491 return df.reset_index(level=level, drop=drop)
1492
1493 # ----------------------------------------------------------------------
c:\users\john\onedrive\documents\github\pandas_johnzangwill\pandas\util\_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
c:\users\john\onedrive\documents\github\pandas_johnzangwill\pandas\core\frame.py in reset_index(self, level, drop, inplace, col_level, col_fill)
5832 )
5833
-> 5834 new_obj.insert(0, name, level_values)
5835
5836 new_obj.index = new_index
c:\users\john\onedrive\documents\github\pandas_johnzangwill\pandas\core\frame.py in insert(self, loc, column, value, allow_duplicates)
4433 if not allow_duplicates and column in self.columns:
4434 # Should this be a different kind of error??
-> 4435 raise ValueError(f"cannot insert {column}, already exists")
4436 if not isinstance(loc, int):
4437 raise TypeError("loc must be int")
ValueError: cannot insert b, already exists
Issue Description
When a DataFrame
is grouped with as_index=False
, then value_counts()
returns a similar DataFrame
with some of the same columns and an additional result column.
This should still work even if the original DataFrame
has duplicate column labels.
Expected Behavior
df.groupby("a", as_index=False).value_counts()
a b b count
0 1 1 1 1
Installed Versions
INSTALLED VERSIONS
commit : 2ab1d1f
python : 3.8.12.final.0
python-bits : 64
OS : Windows
OS-release : 10
Version : 10.0.19043
machine : AMD64
processor : Intel64 Family 6 Model 158 Stepping 9, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : English_United Kingdom.1252
pandas : 1.4.0.dev0+1433.g2ab1d1f8d8
numpy : 1.21.4
pytz : 2021.3
dateutil : 2.8.2
pip : 21.3.1
setuptools : 59.4.0
Cython : 0.29.25
pytest : 6.2.5
hypothesis : 6.31.4
sphinx : 4.3.1
blosc : None
feather : None
xlsxwriter : 3.0.2
lxml.etree : 4.6.4
html5lib : 1.1
pymysql : None
psycopg2 : None
jinja2 : 3.0.3
IPython : 7.30.1
pandas_datareader: None
bs4 : 4.10.0
bottleneck : 1.3.2
fsspec : 2021.11.0
fastparquet : 0.7.2
gcsfs : 2021.11.0
matplotlib : 3.5.0
numexpr : 2.8.0
odfpy : None
openpyxl : 3.0.9
pandas_gbq : None
pyarrow : 6.0.1
pyxlsb : None
s3fs : 2021.11.0
scipy : 1.7.3
sqlalchemy : 1.4.28
tables : 3.6.1
tabulate : 0.8.9
xarray : 0.18.2
xlrd : 2.0.1
xlwt : 1.3.0
numba : 0.53.1