Skip to content

ENH: Added multicolumn/multirow support for latex #14184

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 3, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
295 changes: 150 additions & 145 deletions doc/source/options.rst

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ Other enhancements
- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs <timedeltas.isoformat>` (:issue:`15136`)
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements
- ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`)
- ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`).
- ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`).
Expand Down
31 changes: 29 additions & 2 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,35 @@
: bool
This specifies if the to_latex method of a Dataframe uses escapes special
characters.
method. Valid values: False,True
Valid values: False,True
"""

pc_latex_longtable = """
:bool
This specifies if the to_latex method of a Dataframe uses the longtable
format.
method. Valid values: False,True
Valid values: False,True
"""

pc_latex_multicolumn = """
: bool
This specifies if the to_latex method of a Dataframe uses multicolumns
to pretty-print MultiIndex columns.
Valid values: False,True
"""

pc_latex_multicolumn_format = """
: string
This specifies the format for multicolumn headers.
Can be surrounded with '|'.
Valid values: 'l', 'c', 'r', 'p{<width>}'
"""

pc_latex_multirow = """
: bool
This specifies if the to_latex method of a Dataframe uses multirows
to pretty-print MultiIndex rows.
Valid values: False,True
"""

style_backup = dict()
Expand Down Expand Up @@ -339,6 +360,12 @@ def mpl_style_cb(key):
validator=is_bool)
cf.register_option('latex.longtable', False, pc_latex_longtable,
validator=is_bool)
cf.register_option('latex.multicolumn', True, pc_latex_multicolumn,
validator=is_bool)
cf.register_option('latex.multicolumn_format', 'l', pc_latex_multicolumn,
validator=is_text)
cf.register_option('latex.multirow', False, pc_latex_multirow,
validator=is_bool)

cf.deprecate_option('display.line_width',
msg=pc_line_width_deprecation_warning,
Expand Down
46 changes: 38 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1614,10 +1614,11 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='NaN', formatters=None, float_format=None,
sparsify=None, index_names=True, bold_rows=True,
column_format=None, longtable=None, escape=None,
encoding=None, decimal='.'):
"""
encoding=None, decimal='.', multicolumn=None,
multicolumn_format=None, multirow=None):
r"""
Render a DataFrame to a tabular environment table. You can splice
this into a LaTeX document. Requires \\usepackage{booktabs}.
this into a LaTeX document. Requires \usepackage{booktabs}.

`to_latex`-specific options:

Expand All @@ -1628,27 +1629,54 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3
columns
longtable : boolean, default will be read from the pandas config module
default: False
Default: False.
Use a longtable environment instead of tabular. Requires adding
a \\usepackage{longtable} to your LaTeX preamble.
a \usepackage{longtable} to your LaTeX preamble.
escape : boolean, default will be read from the pandas config module
default: True
Default: True.
When set to False prevents from escaping latex special
characters in column names.
encoding : str, default None
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
decimal : string, default '.'
Character recognized as decimal separator, e.g. ',' in Europe
Character recognized as decimal separator, e.g. ',' in Europe.

.. versionadded:: 0.18.0

multicolumn : boolean, default True
Use \multicolumn to enhance MultiIndex columns.
The default will be read from the config module.

.. versionadded:: 0.20.0

multicolumn_format : str, default 'l'
The alignment for multicolumns, similar to `column_format`
The default will be read from the config module.

.. versionadded:: 0.20.0

multirow : boolean, default False
Use \multirow to enhance MultiIndex rows.
Requires adding a \usepackage{multirow} to your LaTeX preamble.
Will print centered labels (instead of top-aligned)
across the contained rows, separating groups via clines.
The default will be read from the pandas config module.

.. versionadded:: 0.20.0

"""
# Get defaults from the pandas config
if longtable is None:
longtable = get_option("display.latex.longtable")
if escape is None:
escape = get_option("display.latex.escape")
if multicolumn is None:
multicolumn = get_option("display.latex.multicolumn")
if multicolumn_format is None:
multicolumn_format = get_option("display.latex.multicolumn_format")
if multirow is None:
multirow = get_option("display.latex.multirow")

formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
col_space=col_space, na_rep=na_rep,
Expand All @@ -1660,7 +1688,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
index_names=index_names,
escape=escape, decimal=decimal)
formatter.to_latex(column_format=column_format, longtable=longtable,
encoding=encoding)
encoding=encoding, multicolumn=multicolumn,
multicolumn_format=multicolumn_format,
multirow=multirow)

if buf is None:
return formatter.buf.getvalue()
Expand Down
115 changes: 109 additions & 6 deletions pandas/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,13 +650,17 @@ def _join_multiline(self, *strcols):
st = ed
return '\n\n'.join(str_lst)

def to_latex(self, column_format=None, longtable=False, encoding=None):
def to_latex(self, column_format=None, longtable=False, encoding=None,
multicolumn=False, multicolumn_format=None, multirow=False):
"""
Render a DataFrame to a LaTeX tabular/longtable environment output.
"""

latex_renderer = LatexFormatter(self, column_format=column_format,
longtable=longtable)
longtable=longtable,
multicolumn=multicolumn,
multicolumn_format=multicolumn_format,
multirow=multirow)

if encoding is None:
encoding = 'ascii' if compat.PY2 else 'utf-8'
Expand Down Expand Up @@ -824,11 +828,15 @@ class LatexFormatter(TableFormatter):
HTMLFormatter
"""

def __init__(self, formatter, column_format=None, longtable=False):
def __init__(self, formatter, column_format=None, longtable=False,
multicolumn=False, multicolumn_format=None, multirow=False):
self.fmt = formatter
self.frame = self.fmt.frame
self.column_format = column_format
self.longtable = longtable
self.multicolumn = multicolumn
self.multicolumn_format = multicolumn_format
self.multirow = multirow

def write_result(self, buf):
"""
Expand All @@ -850,14 +858,21 @@ def get_col_type(dtype):
else:
return 'l'

# reestablish the MultiIndex that has been joined by _to_str_column
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
clevels = self.frame.columns.nlevels
strcols.pop(0)
name = any(self.frame.index.names)
cname = any(self.frame.columns.names)
lastcol = self.frame.index.nlevels - 1
for i, lev in enumerate(self.frame.index.levels):
lev2 = lev.format()
blank = ' ' * len(lev2[0])
lev3 = [blank] * clevels
# display column names in last index-column
if cname and i == lastcol:
lev3 = [x if x else '{}' for x in self.frame.columns.names]
else:
lev3 = [blank] * clevels
if name:
lev3.append(lev.name)
for level_idx, group in itertools.groupby(
Expand Down Expand Up @@ -885,10 +900,15 @@ def get_col_type(dtype):
buf.write('\\begin{longtable}{%s}\n' % column_format)
buf.write('\\toprule\n')

nlevels = self.frame.columns.nlevels
ilevels = self.frame.index.nlevels
clevels = self.frame.columns.nlevels
nlevels = clevels
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to have both nlevels and clevels

Copy link
Author

@sgsaenger sgsaenger Dec 20, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No
Kinda.
I used clevels to keep from fiddling with the handling of the index-names-line.
On the compiled side there's no difference if i just use the nlevels, but the resulting code differs, and i'd rather not change the behaviour in parts that don't need to be modified.

if any(self.frame.index.names):
nlevels += 1
for i, row in enumerate(zip(*strcols)):
strrows = list(zip(*strcols))
self.clinebuf = []

for i, row in enumerate(strrows):
if i == nlevels and self.fmt.header:
buf.write('\\midrule\n') # End of header
if self.longtable:
Expand All @@ -910,15 +930,98 @@ def get_col_type(dtype):
if x else '{}') for x in row]
else:
crow = [x if x else '{}' for x in row]
if i < clevels and self.fmt.header and self.multicolumn:
# sum up columns to multicolumns
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you put the content contained in this if statement in a separate method on the class? Eg call it something like _format_multicolumn(..) ? Then you can give it a docstring with some explanation what exactly happens. And that will make the big for loop here a bit more comprehensible.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are class-methods preferred over nested-functions?

crow = self._format_multicolumn(crow, ilevels)
if (i >= nlevels and self.fmt.index and self.multirow and
ilevels > 1):
# sum up rows to multirows
crow = self._format_multirow(crow, ilevels, i, strrows)
buf.write(' & '.join(crow))
buf.write(' \\\\\n')
if self.multirow and i < len(strrows) - 1:
self._print_cline(buf, i, len(strcols))

if not self.longtable:
buf.write('\\bottomrule\n')
buf.write('\\end{tabular}\n')
else:
buf.write('\\end{longtable}\n')

def _format_multicolumn(self, row, ilevels):
"""
Combine columns belonging to a group to a single multicolumn entry
according to self.multicolumn_format

e.g.:
a & & & b & c &
will become
\multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c}
"""
row2 = list(row[:ilevels])
ncol = 1
coltext = ''

def append_col():
# write multicolumn if needed
if ncol > 1:
row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}'
.format(ncol, self.multicolumn_format,
coltext.strip()))
# don't modify where not needed
else:
row2.append(coltext)
for c in row[ilevels:]:
# if next col has text, write the previous
if c.strip():
if coltext:
append_col()
coltext = c
ncol = 1
# if not, add it to the previous multicolumn
else:
ncol += 1
# write last column name
if coltext:
append_col()
return row2

def _format_multirow(self, row, ilevels, i, rows):
"""
Check following rows, whether row should be a multirow

e.g.: becomes:
a & 0 & \multirow{2}{*}{a} & 0 &
& 1 & & 1 &
b & 0 & \cline{1-2}
b & 0 &
"""
for j in range(ilevels):
if row[j].strip():
nrow = 1
for r in rows[i + 1:]:
if not r[j].strip():
nrow += 1
else:
break
if nrow > 1:
# overwrite non-multirow entry
row[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'.format(
nrow, row[j].strip())
# save when to end the current block with \cline
self.clinebuf.append([i + nrow - 1, j + 1])
return row

def _print_cline(self, buf, i, icol):
"""
Print clines after multirow-blocks are finished
"""
for cl in self.clinebuf:
if cl[0] == i:
buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1], icol))
# remove entries that have been written to buffer
self.clinebuf = [x for x in self.clinebuf if x[0] != i]


class HTMLFormatter(TableFormatter):

Expand Down
Loading