Skip to content

Assert fail in document.py when typing while large history loading #1158

Closed
@bobhy

Description

@bobhy

Seems to be multitasking hazard between input on the main event loop and ThreadedHistory invoking callback from another thread. First reported in xonsh/xonsh#3586.

Repro (using sample program below):

  1. run test program, get the 'Say something' prompt
  2. wait 3 - 5 seconds.
  3. type anything and press ENTER.

Observe stack trace like the following:

nhandled exception in event loop:
  File "/usr/lib/python3.8/asyncio/events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/eventloop/utils.py", line 72, in schedule
    func()
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/application/application.py", line 456, in redraw
    self._redraw()
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/application/application.py", line 523, in _redraw
    self.context.run(run_in_context)
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/application/application.py", line 509, in run_in_context
    self.renderer.render(self, self.layout)
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/renderer.py", line 654, in render
    layout.container.write_to_screen(
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 381, in write_to_screen
    sizes = self._divide_heights(write_position)
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 435, in _divide_heights
    dimensions = [c.preferred_height(width, height) for c in self._all_children]
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 435, in <listcomp>
    dimensions = [c.preferred_height(width, height) for c in self._all_children]
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 792, in preferred_height
    return self.content.preferred_height(width, max_available_height)
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 324, in preferred_height
    dimensions = [
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 325, in <listcomp>
    c.preferred_height(width, max_available_height) for c in self._all_children
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 2595, in preferred_height
    return self.content.preferred_height(width, max_available_height)
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 1620, in preferred_height
    return self._merge_dimensions(
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 1648, in _merge_dimensions
    preferred = get_preferred()
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/containers.py", line 1613, in preferred_content_height
    return self.content.preferred_height(
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/controls.py", line 639, in preferred_height
    content = self.create_content(width, height=1)  # Pass a dummy '1' as height.
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/layout/controls.py", line 761, in create_content
    document = buffer.document
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/buffer.py", line 527, in document
    return self._document_cache[
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/cache.py", line 98, in __missing__
    result = self.get_value(*key)
  File "/mnt/myd/dev/src/python-prompt-toolkit/prompt_toolkit/document.py", line 98, in __init__
    assert cursor_position is None or cursor_position <= len(text), AssertionError(

Exception cursor_position=2, len_text=1
Press ENTER to continue....

Sample program (bug_thread_history.py, based on examples/prompts/history/slow-history.py): This has been modified to load 100000 lines into initial buffer with no delay. This is arguably unrealistic, but it's a good test, and a model for what we saw in Xonsh.

#!/usr/bin/env python
"""
Demonstrate bug in threaded history, where asynchronous loading can corrupt Buffer context.

Seems to happen with very large history being loaded and causing slowdowns.

"""
import time

from prompt_toolkit import PromptSession
from prompt_toolkit.history import History, ThreadedHistory

import re


class MegaHistory(History):
    """
    Example class that loads lots of history

    Sample designed to exercise existing multitasking hazards, don't add any more.
    """

    def __init__(self, init_request:int = 1000, *args, **kwargs):
        super(MegaHistory, self).__init__(*args, **kwargs)
        self.requested_count = 0        # only modified by main (requesting) thread
        self.requested_commands = 0     # only modified by main (requesting) thread
        self.actual_count = 0           # only modified by background thread

    def load_history_strings(self):
        while True:
            while self.requested_count <= self.actual_count:
                time.sleep(0.001)   # don't busy loop

            print(f'... starting to load {self.requested_count - self.actual_count:15,d} more items.')
            while self.requested_count > self.actual_count:
                yield f"History item {self.actual_count:15,d}, command number {self.requested_commands}"
                self.actual_count += 1
            print('...done.')            

    def store_string(self, string):
        pass  # Don't store strings.

    # called by main thread, watch out for multitasking hazards.
    def add_request(self, requested:int = 0):
        self.requested_count += requested
        self.requested_commands += 1

    def show_request(self):
        print(f'Have loaded {self.actual_count:15,d} of {self.requested_count:15,d} in {self.requested_commands} commands.')


HIST_CMD = re.compile(r'^hist (load (\d+)|show)$', re.IGNORECASE)


def main():
    print(
        "Asynchronous loading of history. Designed to exercise multitasking hazard in Buffer.\n"
        "When started, tries to load 100,000 lines into history with no delay.\n"
        "Expect to trigger assertion in document.py line 98, though others may fire.\n"
        "\n"
        "Can request more lines by `hist load nnnnn`, and can see progress by `hist show`.\n"
        "\n"
    )
    mh = MegaHistory()
    our_history = ThreadedHistory(mh)

    # The history needs to be passed to the `PromptSession`. It can't be passed
    # to the `prompt` call because only one history can be used during a
    # session.
    session = PromptSession(history=our_history)

    mh.add_request(99999)

    while True:
        text = session.prompt("Say something: ")
        if text.startswith('hist'):
            m = HIST_CMD.match(text)
            if not m:
                print('eh?')
            else:
                if m[1] == 'show':
                    mh.show_request()
                elif m[1].startswith('load'):
                    mh.add_request(int(m[2]))
                else:
                    print('eh? hist load nnnnnn\nor hist show')
            pass
        else:
            print("You said: %s" % text)


if __name__ == "__main__":
    main()

I know there are disclaimers all over history.py about futility of sharing an event loop between history and main prompt in the general case, but I don't think it's unreasonable to require ThreadedHistory to be instantiated on the same thread that will eventually instantiate Application.

I have a fork of ptk doing just this which seems to work fine with Xonsh (and with this test program), will submit a PR for discussion.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions