-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
DOC: Doc build for a single doc made much faster, and clean up #24428
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
2571572
83c3b45
a5e7ad2
0fda07e
e851cae
fc9c359
8e1e9e4
ade46e6
648da45
c489aa6
2be1d61
55e47a2
4406988
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,11 +15,9 @@ | |
import sys | ||
import os | ||
import shutil | ||
# import subprocess | ||
import subprocess | ||
import argparse | ||
from contextlib import contextmanager | ||
import webbrowser | ||
import jinja2 | ||
|
||
|
||
DOC_PATH = os.path.dirname(os.path.abspath(__file__)) | ||
|
@@ -28,179 +26,67 @@ | |
BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates'] | ||
|
||
|
||
@contextmanager | ||
def _maybe_exclude_notebooks(): | ||
"""Skip building the notebooks if pandoc is not installed. | ||
|
||
This assumes that nbsphinx is installed. | ||
|
||
Skip notebook conversion if: | ||
1. nbconvert isn't installed, or | ||
2. nbconvert is installed, but pandoc isn't | ||
""" | ||
# TODO move to exclude_pattern | ||
base = os.path.dirname(__file__) | ||
notebooks = [os.path.join(base, 'source', nb) | ||
for nb in ['style.ipynb']] | ||
contents = {} | ||
|
||
def _remove_notebooks(): | ||
for nb in notebooks: | ||
with open(nb, 'rt') as f: | ||
contents[nb] = f.read() | ||
os.remove(nb) | ||
|
||
try: | ||
import nbconvert | ||
except ImportError: | ||
sys.stderr.write('Warning: nbconvert not installed. ' | ||
'Skipping notebooks.\n') | ||
_remove_notebooks() | ||
else: | ||
try: | ||
nbconvert.utils.pandoc.get_pandoc_version() | ||
except nbconvert.utils.pandoc.PandocMissing: | ||
sys.stderr.write('Warning: Pandoc is not installed. ' | ||
'Skipping notebooks.\n') | ||
_remove_notebooks() | ||
|
||
yield | ||
|
||
for nb, content in contents.items(): | ||
with open(nb, 'wt') as f: | ||
f.write(content) | ||
|
||
|
||
class DocBuilder: | ||
"""Class to wrap the different commands of this script. | ||
""" | ||
Class to wrap the different commands of this script. | ||
|
||
All public methods of this class can be called as parameters of the | ||
script. | ||
""" | ||
def __init__(self, num_jobs=1, include_api=True, single_doc=None, | ||
verbosity=0): | ||
verbosity=0, warnings_are_errors=False): | ||
self.num_jobs = num_jobs | ||
self.include_api = include_api | ||
self.verbosity = verbosity | ||
self.single_doc = None | ||
self.single_doc_type = None | ||
if single_doc is not None: | ||
self._process_single_doc(single_doc) | ||
self.exclude_patterns = self._exclude_patterns | ||
|
||
self._generate_index() | ||
if self.single_doc_type == 'docstring': | ||
self._run_os('sphinx-autogen', '-o', | ||
'source/generated_single', 'source/index.rst') | ||
|
||
@property | ||
def _exclude_patterns(self): | ||
"""Docs source files that will be excluded from building.""" | ||
# TODO move maybe_exclude_notebooks here | ||
if self.single_doc is not None: | ||
rst_files = [f for f in os.listdir(SOURCE_PATH) | ||
if ((f.endswith('.rst') or f.endswith('.ipynb')) | ||
and (f != 'index.rst') | ||
and (f != '{0}.rst'.format(self.single_doc)))] | ||
if self.single_doc_type != 'api': | ||
rst_files += ['generated/*.rst'] | ||
elif not self.include_api: | ||
rst_files = ['api.rst', 'generated/*.rst'] | ||
else: | ||
rst_files = ['generated_single/*.rst'] | ||
|
||
exclude_patterns = ','.join( | ||
'{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files) | ||
|
||
return exclude_patterns | ||
self.warnings_are_errors = warnings_are_errors | ||
|
||
if single_doc: | ||
single_doc = self._process_single_doc(single_doc) | ||
include_api = False | ||
os.environ['SPHINX_PATTERN'] = single_doc | ||
elif not include_api: | ||
os.environ['SPHINX_PATTERN'] = '-api' | ||
|
||
self.single_doc_html = None | ||
if single_doc and single_doc.endswith('.rst'): | ||
self.single_doc_html = os.path.splitext(single_doc)[0] + '.html' | ||
elif single_doc: | ||
self.single_doc_html = 'generated/pandas.{}.html'.format( | ||
single_doc) | ||
|
||
def _process_single_doc(self, single_doc): | ||
"""Extract self.single_doc (base name) and self.single_doc_type from | ||
passed single_doc kwarg. | ||
|
||
""" | ||
self.include_api = False | ||
|
||
if single_doc == 'api.rst' or single_doc == 'api': | ||
self.single_doc_type = 'api' | ||
self.single_doc = 'api' | ||
elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)): | ||
self.single_doc_type = 'rst' | ||
Make sure the provided value for --single is a path to an existing | ||
.rst/.ipynb file, or a pandas object that can be imported. | ||
|
||
if 'whatsnew' in single_doc: | ||
basename = single_doc | ||
For example, categorial.rst or pandas.DataFrame.head. For the latter, | ||
return the corresponding file path | ||
(e.g. generated/pandas.DataFrame.head.rst). | ||
""" | ||
base_name, extension = os.path.splitext(single_doc) | ||
if extension in ('.rst', '.ipynb'): | ||
if os.path.exists(os.path.join(SOURCE_PATH, single_doc)): | ||
return single_doc | ||
else: | ||
basename = os.path.basename(single_doc) | ||
self.single_doc = os.path.splitext(basename)[0] | ||
elif os.path.exists( | ||
os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))): | ||
self.single_doc_type = 'rst' | ||
self.single_doc = single_doc | ||
elif single_doc is not None: | ||
raise FileNotFoundError('File {} not found'.format(single_doc)) | ||
|
||
elif single_doc.startswith('pandas.'): | ||
try: | ||
obj = pandas # noqa: F821 | ||
for name in single_doc.split('.'): | ||
obj = getattr(obj, name) | ||
except AttributeError: | ||
raise ValueError('Single document not understood, it should ' | ||
'be a file in doc/source/*.rst (e.g. ' | ||
'"contributing.rst" or a pandas function or ' | ||
'method (e.g. "pandas.DataFrame.head")') | ||
raise ImportError('Could not import {}'.format(single_doc)) | ||
else: | ||
self.single_doc_type = 'docstring' | ||
if single_doc.startswith('pandas.'): | ||
self.single_doc = single_doc[len('pandas.'):] | ||
else: | ||
self.single_doc = single_doc | ||
|
||
def _copy_generated_docstring(self): | ||
"""Copy existing generated (from api.rst) docstring page because | ||
this is more correct in certain cases (where a custom autodoc | ||
template is used). | ||
|
||
""" | ||
fname = os.path.join(SOURCE_PATH, 'generated', | ||
'pandas.{}.rst'.format(self.single_doc)) | ||
temp_dir = os.path.join(SOURCE_PATH, 'generated_single') | ||
|
||
try: | ||
os.makedirs(temp_dir) | ||
except OSError: | ||
pass | ||
|
||
if os.path.exists(fname): | ||
try: | ||
# copying to make sure sphinx always thinks it is new | ||
# and needs to be re-generated (to pick source code changes) | ||
shutil.copy(fname, temp_dir) | ||
except: # noqa | ||
pass | ||
|
||
def _generate_index(self): | ||
"""Create index.rst file with the specified sections.""" | ||
if self.single_doc_type == 'docstring': | ||
self._copy_generated_docstring() | ||
|
||
with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f: | ||
t = jinja2.Template(f.read()) | ||
|
||
with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f: | ||
f.write(t.render(include_api=self.include_api, | ||
single_doc=self.single_doc, | ||
single_doc_type=self.single_doc_type)) | ||
|
||
@staticmethod | ||
def _create_build_structure(): | ||
"""Create directories required to build documentation.""" | ||
for dirname in BUILD_DIRS: | ||
try: | ||
os.makedirs(os.path.join(BUILD_PATH, dirname)) | ||
except OSError: | ||
pass | ||
return single_doc[len('pandas.'):] | ||
else: | ||
raise ValueError('--single value should be a valid path to a ' | ||
'.rst or .ipynb file, or a valid pandas object ' | ||
'(e.g. categorical.rst or pandas.DataFrame.head)') | ||
|
||
@staticmethod | ||
def _run_os(*args): | ||
"""Execute a command as a OS terminal. | ||
""" | ||
Execute a command as a OS terminal. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -211,16 +97,11 @@ def _run_os(*args): | |
-------- | ||
>>> DocBuilder()._run_os('python', '--version') | ||
""" | ||
# TODO check_call should be more safe, but it fails with | ||
# exclude patterns, needs investigation | ||
# subprocess.check_call(args, stderr=subprocess.STDOUT) | ||
exit_status = os.system(' '.join(args)) | ||
if exit_status: | ||
msg = 'Command "{}" finished with exit code {}' | ||
raise RuntimeError(msg.format(' '.join(args), exit_status)) | ||
subprocess.check_call(args, stdout=sys.stdout, stderr=sys.stderr) | ||
|
||
def _sphinx_build(self, kind): | ||
"""Call sphinx to build documentation. | ||
""" | ||
Call sphinx to build documentation. | ||
|
||
Attribute `num_jobs` from the class is used. | ||
|
||
|
@@ -236,43 +117,44 @@ def _sphinx_build(self, kind): | |
raise ValueError('kind must be html or latex, ' | ||
'not {}'.format(kind)) | ||
|
||
self._run_os('sphinx-build', | ||
'-j{}'.format(self.num_jobs), | ||
'-b{}'.format(kind), | ||
'-{}'.format( | ||
'v' * self.verbosity) if self.verbosity else '', | ||
'-d"{}"'.format(os.path.join(BUILD_PATH, 'doctrees')), | ||
'-Dexclude_patterns={}'.format(self.exclude_patterns), | ||
'"{}"'.format(SOURCE_PATH), | ||
'"{}"'.format(os.path.join(BUILD_PATH, kind))) | ||
|
||
def _open_browser(self): | ||
base_url = os.path.join('file://', DOC_PATH, 'build', 'html') | ||
if self.single_doc_type == 'docstring': | ||
url = os.path.join( | ||
base_url, | ||
'generated_single', 'pandas.{}.html'.format(self.single_doc)) | ||
else: | ||
url = os.path.join(base_url, '{}.html'.format(self.single_doc)) | ||
self.clean() | ||
|
||
cmd = ['sphinx-build', '-b', kind] | ||
if self.num_jobs: | ||
cmd += ['-j', str(self.num_jobs)] | ||
if self.warnings_are_errors: | ||
cmd.append('-W') | ||
if self.verbosity: | ||
cmd.append('-{}'.format('v' * self.verbosity)) | ||
cmd += ['-d', os.path.join(BUILD_PATH, 'doctrees'), | ||
SOURCE_PATH, os.path.join(BUILD_PATH, kind)] | ||
cmd = ['sphinx-build', SOURCE_PATH, os.path.join(BUILD_PATH, kind)] | ||
self._run_os(*cmd) | ||
|
||
def _open_browser(self, single_doc_html): | ||
""" | ||
Open a browser tab showing single | ||
""" | ||
url = os.path.join('file://', DOC_PATH, 'build', 'html', | ||
single_doc_html) | ||
webbrowser.open(url, new=2) | ||
|
||
def html(self): | ||
"""Build HTML documentation.""" | ||
self._create_build_structure() | ||
with _maybe_exclude_notebooks(): | ||
self._sphinx_build('html') | ||
zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip') | ||
if os.path.exists(zip_fname): | ||
os.remove(zip_fname) | ||
|
||
if self.single_doc is not None: | ||
self._open_browser() | ||
shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'), | ||
ignore_errors=True) | ||
""" | ||
Build HTML documentation. | ||
""" | ||
self._sphinx_build('html') | ||
zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip') | ||
if os.path.exists(zip_fname): | ||
os.remove(zip_fname) | ||
|
||
if self.single_doc_html is not None: | ||
self._open_browser(self.single_doc_html) | ||
|
||
def latex(self, force=False): | ||
"""Build PDF documentation.""" | ||
self._create_build_structure() | ||
""" | ||
Build PDF documentation. | ||
""" | ||
if sys.platform == 'win32': | ||
sys.stderr.write('latex build has not been tested on windows\n') | ||
else: | ||
|
@@ -289,18 +171,24 @@ def latex(self, force=False): | |
self._run_os('make') | ||
|
||
def latex_forced(self): | ||
"""Build PDF documentation with retries to find missing references.""" | ||
""" | ||
Build PDF documentation with retries to find missing references. | ||
""" | ||
self.latex(force=True) | ||
|
||
@staticmethod | ||
def clean(): | ||
"""Clean documentation generated files.""" | ||
""" | ||
Clean documentation generated files. | ||
""" | ||
shutil.rmtree(BUILD_PATH, ignore_errors=True) | ||
shutil.rmtree(os.path.join(SOURCE_PATH, 'generated'), | ||
ignore_errors=True) | ||
|
||
def zip_html(self): | ||
"""Compress HTML documentation into a zip file.""" | ||
""" | ||
Compress HTML documentation into a zip file. | ||
""" | ||
zip_fname = os.path.join(BUILD_PATH, 'html', 'pandas.zip') | ||
if os.path.exists(zip_fname): | ||
os.remove(zip_fname) | ||
|
@@ -326,7 +214,7 @@ def main(): | |
help='command to run: {}'.format(', '.join(cmds))) | ||
argparser.add_argument('--num-jobs', | ||
type=int, | ||
default=1, | ||
default=0, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the DocBuilder init, the default is 1. Make them both the same? What does a value of 0 do? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. with It's a bit weird, because the I'm happy setting our default to I address your other comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm I think multiple cores speeds things up for me, at least for a full doc build. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked it with |
||
help='number of jobs used by sphinx-build') | ||
argparser.add_argument('--no-api', | ||
default=False, | ||
|
@@ -345,6 +233,9 @@ def main(): | |
argparser.add_argument('-v', action='count', dest='verbosity', default=0, | ||
help=('increase verbosity (can be repeated), ' | ||
'passed to the sphinx build command')) | ||
argparser.add_argument('--warnings-are-errors', '-W', | ||
action='store_true', | ||
help='fail if warnings are raised') | ||
args = argparser.parse_args() | ||
|
||
if args.command not in cmds: | ||
|
@@ -364,7 +255,7 @@ def main(): | |
os.environ['MPLBACKEND'] = 'module://matplotlib.backends.backend_agg' | ||
|
||
builder = DocBuilder(args.num_jobs, not args.no_api, args.single, | ||
args.verbosity) | ||
args.verbosity, args.warnings_are_errors) | ||
getattr(builder, args.command)() | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you print out
single_doc
here, so the user sees what they passed?