From 6546f0638f30d3a77375532c4cfc7e3d9fb91faf Mon Sep 17 00:00:00 2001
From: Andreas Mueller <amueller@nyu.edu>
Date: Wed, 7 Sep 2016 12:05:06 -0400
Subject: [PATCH] add (possibly slightly off) source lines to mangled
 docstrings.

more agressive change to match up new lines with old lines.

adjust expectations in docstring

strip() instead of strip(" ")

added really long explanation and doctest

make lines actually 80 chars long and dont guess

typo
---
 numpydoc/numpydoc.py | 64 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 3 deletions(-)

diff --git a/numpydoc/numpydoc.py b/numpydoc/numpydoc.py
index 3398c678..d59c5f0b 100644
--- a/numpydoc/numpydoc.py
+++ b/numpydoc/numpydoc.py
@@ -29,7 +29,6 @@
     raise RuntimeError("Sphinx 1.0.1 or newer is required")
 
 from .docscrape_sphinx import get_doc_object, SphinxDocString
-from sphinx.util.compat import Directive
 
 if sys.version_info[0] >= 3:
     sixu = lambda s: s
@@ -139,7 +138,7 @@ def setup(app, get_doc_object_=get_doc_object):
     # Extra mangling domains
     app.add_domain(NumpyPythonDomain)
     app.add_domain(NumpyCDomain)
-    
+
     metadata = {'parallel_read_safe': True}
     return metadata
 
@@ -190,6 +189,62 @@ class NumpyCDomain(ManglingDomainBase, CDomain):
     }
 
 
+def match_items(lines, content_old):
+    """Create items for mangled lines.
+
+    This function tries to match the lines in ``lines`` with the items (source
+    file references and line numbers) in ``content_old``. The
+    ``mangle_docstrings`` function changes the actual docstrings, but doesn't
+    keep track of where each line came from. The manging does many operations
+    on the original lines, which are hard to track afterwards.
+
+    Many of the line changes come from deleting or inserting blank lines. This
+    function tries to match lines by ignoring blank lines. All other changes
+    (such as inserting figures or changes in the references) are completely
+    ignored, so the generated line numbers will be off if ``mangle_docstrings``
+    does anything non-trivial.
+
+    This is a best-effort function and the real fix would be to make
+    ``mangle_docstrings`` actually keep track of the ``items`` together with
+    the ``lines``.
+
+    Examples
+    --------
+    >>> lines = ['', 'A', '', 'B', '   ', '', 'C', 'D']
+    >>> lines_old = ['a', '', '', 'b', '', 'c']
+    >>> items_old = [('file1.py', 0), ('file1.py', 1), ('file1.py', 2),
+    ...              ('file2.py', 0), ('file2.py', 1), ('file2.py', 2)]
+    >>> content_old = ViewList(lines_old, items=items_old)
+    >>> match_items(lines, content_old) # doctest: +NORMALIZE_WHITESPACE
+    [('file1.py', 0), ('file1.py', 0), ('file2.py', 0), ('file2.py', 0),
+     ('file2.py', 2), ('file2.py', 2), ('file2.py', 2), ('file2.py', 2)]
+    >>> # first 2 ``lines`` are matched to 'a', second 2 to 'b', rest to 'c'
+    >>> # actual content is completely ignored.
+
+    Notes
+    -----
+    The algorithm tries to match any line in ``lines`` with one in
+    ``lines_old``.  It skips over all empty lines in ``lines_old`` and assigns
+    this line number to all lines in ``lines``, unless a non-empty line is
+    found in ``lines`` in which case it goes to the next line in ``lines_old``.
+
+    """
+    items_new = []
+    lines_old = content_old.data
+    items_old = content_old.items
+    j = 0
+    for i, line in enumerate(lines):
+        # go to next non-empty line in old:
+        # line.strip() checks whether the string is all whitespace
+        while j < len(lines_old) - 1 and not lines_old[j].strip():
+            j += 1
+        items_new.append(items_old[j])
+        if line.strip() and j < len(lines_old) - 1:
+            j += 1
+    assert(len(items_new) == len(lines))
+    return items_new
+
+
 def wrap_mangling_directive(base_directive, objtype):
     class directive(base_directive):
         def run(self):
@@ -205,7 +260,10 @@ def run(self):
 
             lines = list(self.content)
             mangle_docstrings(env.app, objtype, name, None, None, lines)
-            self.content = ViewList(lines, self.content.parent)
+            if self.content:
+                items = match_items(lines, self.content)
+                self.content = ViewList(lines, items=items,
+                                        parent=self.content.parent)
 
             return base_directive.run(self)