Closed
Description
Something updated and left spam behind; only on the 3.8 build (its possible this is the only one we are testing).
2021-03-03T09:34:50.9393763Z ================================== FAILURES ===================================
2021-03-03T09:34:50.9394902Z ___________________________ test_bs4_version_fails ____________________________
2021-03-03T09:34:50.9396205Z [gw3] win32 -- Python 3.8.8 C:\Miniconda\envs\pandas-dev\python.exe
2021-03-03T09:34:50.9397221Z
2021-03-03T09:34:50.9398044Z monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x000002A71EF656D0>
2021-03-03T09:34:50.9399101Z datapath = <function datapath.<locals>.deco at 0x000002A71EEA7040>
2021-03-03T09:34:50.9400267Z
2021-03-03T09:34:50.9401214Z @td.skip_if_no("bs4")
2021-03-03T09:34:50.9402383Z def test_bs4_version_fails(monkeypatch, datapath):
2021-03-03T09:34:50.9403404Z import bs4
2021-03-03T09:34:50.9404409Z
2021-03-03T09:34:50.9405477Z monkeypatch.setattr(bs4, "__version__", "4.2")
2021-03-03T09:34:50.9406860Z with pytest.raises(ImportError, match="Pandas requires version"):
2021-03-03T09:34:50.9408079Z > read_html(datapath("io", "data", "html", "spam.html"), flavor="bs4")
2021-03-03T09:34:50.9408759Z
2021-03-03T09:34:50.9409527Z pandas\tests\io\test_html.py:77:
2021-03-03T09:34:50.9410691Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9411453Z
2021-03-03T09:34:50.9412292Z args = ('D:\\a\\1\\s\\pandas\\tests\\io\\data\\html\\spam.html',)
2021-03-03T09:34:50.9413367Z kwargs = {'flavor': 'bs4'}, arguments = " except for the argument 'io'"
2021-03-03T09:34:50.9414395Z num_allow_args = 1
2021-03-03T09:34:50.9415166Z
2021-03-03T09:34:50.9416110Z @wraps(func)
2021-03-03T09:34:50.9417000Z def wrapper(*args, **kwargs):
2021-03-03T09:34:50.9418076Z arguments = _format_argument_list(allow_args)
2021-03-03T09:34:50.9420427Z if isinstance(allow_args, (list, tuple)):
2021-03-03T09:34:50.9422235Z num_allow_args = len(allow_args)
2021-03-03T09:34:50.9423173Z else:
2021-03-03T09:34:50.9424186Z num_allow_args = allow_args
2021-03-03T09:34:50.9425230Z if len(args) > num_allow_args:
2021-03-03T09:34:50.9426200Z msg = (
2021-03-03T09:34:50.9427274Z f"Starting with Pandas version {version} all arguments of "
2021-03-03T09:34:50.9428386Z f"{func.__name__}{arguments} will be keyword-only"
2021-03-03T09:34:50.9429355Z )
2021-03-03T09:34:50.9430499Z warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
2021-03-03T09:34:50.9431542Z > return func(*args, **kwargs)
2021-03-03T09:34:50.9432167Z
2021-03-03T09:34:50.9432866Z pandas\util\_decorators.py:310:
2021-03-03T09:34:50.9434201Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9435043Z
2021-03-03T09:34:50.9436688Z io = 'D:\\a\\1\\s\\pandas\\tests\\io\\data\\html\\spam.html', match = '.+'
2021-03-03T09:34:50.9437750Z flavor = 'bs4', header = None, index_col = None, skiprows = None, attrs = None
2021-03-03T09:34:50.9439069Z parse_dates = False, thousands = ',', encoding = None, decimal = '.'
2021-03-03T09:34:50.9440479Z converters = None, na_values = None, keep_default_na = True
2021-03-03T09:34:50.9441401Z displayed_only = True
2021-03-03T09:34:50.9442035Z
2021-03-03T09:34:50.9443107Z @deprecate_nonkeyword_arguments(version="2.0")
2021-03-03T09:34:50.9443927Z def read_html(
2021-03-03T09:34:50.9444910Z io: FilePathOrBuffer,
2021-03-03T09:34:50.9446104Z match: Union[str, Pattern] = ".+",
2021-03-03T09:34:50.9447121Z flavor: Optional[str] = None,
2021-03-03T09:34:50.9448233Z header: Optional[Union[int, Sequence[int]]] = None,
2021-03-03T09:34:50.9449370Z index_col: Optional[Union[int, Sequence[int]]] = None,
2021-03-03T09:34:50.9450592Z skiprows: Optional[Union[int, Sequence[int], slice]] = None,
2021-03-03T09:34:50.9453243Z attrs: Optional[Dict[str, str]] = None,
2021-03-03T09:34:50.9454443Z parse_dates: bool = False,
2021-03-03T09:34:50.9455453Z thousands: Optional[str] = ",",
2021-03-03T09:34:50.9456466Z encoding: Optional[str] = None,
2021-03-03T09:34:50.9457526Z decimal: str = ".",
2021-03-03T09:34:50.9458519Z converters: Optional[Dict] = None,
2021-03-03T09:34:50.9459455Z na_values=None,
2021-03-03T09:34:50.9460540Z keep_default_na: bool = True,
2021-03-03T09:34:50.9461533Z displayed_only: bool = True,
2021-03-03T09:34:50.9462491Z ) -> List[DataFrame]:
2021-03-03T09:34:50.9463397Z r"""
2021-03-03T09:34:50.9464413Z Read HTML tables into a ``list`` of ``DataFrame`` objects.
2021-03-03T09:34:50.9465322Z
2021-03-03T09:34:50.9466210Z Parameters
2021-03-03T09:34:50.9467115Z ----------
2021-03-03T09:34:50.9468087Z io : str, path object or file-like object
2021-03-03T09:34:50.9469308Z A URL, a file-like object, or a raw string containing HTML. Note that
2021-03-03T09:34:50.9470751Z lxml only accepts the http, ftp and file url protocols. If you have a
2021-03-03T09:34:50.9471778Z URL that starts with ``'https'`` you might try removing the ``'s'``.
2021-03-03T09:34:50.9473219Z
2021-03-03T09:34:50.9474209Z match : str or compiled regular expression, optional
2021-03-03T09:34:50.9475628Z The set of tables containing text matching this regex or string will be
2021-03-03T09:34:50.9476778Z returned. Unless the HTML is extremely simple you will probably need to
2021-03-03T09:34:50.9477931Z pass a non-empty string here. Defaults to '.+' (match any non-empty
2021-03-03T09:34:50.9479073Z string). The default value will return all tables contained on a page.
2021-03-03T09:34:50.9480641Z This value is converted to a regular expression so that there is
2021-03-03T09:34:50.9482369Z consistent behavior between Beautiful Soup and lxml.
2021-03-03T09:34:50.9483359Z
2021-03-03T09:34:50.9484366Z flavor : str, optional
2021-03-03T09:34:50.9486070Z The parsing engine to use. 'bs4' and 'html5lib' are synonymous with
2021-03-03T09:34:50.9487175Z each other, they are both there for backwards compatibility. The
2021-03-03T09:34:50.9488508Z default of ``None`` tries to use ``lxml`` to parse and if that fails it
2021-03-03T09:34:50.9489964Z falls back on ``bs4`` + ``html5lib``.
2021-03-03T09:34:50.9490925Z
2021-03-03T09:34:50.9491964Z header : int or list-like, optional
2021-03-03T09:34:50.9493314Z The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
2021-03-03T09:34:50.9496285Z make the columns headers.
2021-03-03T09:34:50.9497205Z
2021-03-03T09:34:50.9498236Z index_col : int or list-like, optional
2021-03-03T09:34:50.9499348Z The column (or list of columns) to use to create the index.
2021-03-03T09:34:50.9500559Z
2021-03-03T09:34:50.9501458Z skiprows : int, list-like or slice, optional
2021-03-03T09:34:50.9502617Z Number of rows to skip after parsing the column integer. 0-based. If a
2021-03-03T09:34:50.9503782Z sequence of integers or a slice is given, will skip the rows indexed by
2021-03-03T09:34:50.9504958Z that sequence. Note that a single element sequence means 'skip the nth
2021-03-03T09:34:50.9506054Z row' whereas an integer means 'skip n rows'.
2021-03-03T09:34:50.9507075Z
2021-03-03T09:34:50.9508036Z attrs : dict, optional
2021-03-03T09:34:50.9509131Z This is a dictionary of attributes that you can pass to use to identify
2021-03-03T09:34:50.9510472Z the table in the HTML. These are not checked for validity before being
2021-03-03T09:34:50.9511547Z passed to lxml or Beautiful Soup. However, these attributes must be
2021-03-03T09:34:50.9512772Z valid HTML table attributes to work correctly. For example, ::
2021-03-03T09:34:50.9514058Z
2021-03-03T09:34:50.9515226Z attrs = {'id': 'table'}
2021-03-03T09:34:50.9516136Z
2021-03-03T09:34:50.9517184Z is a valid attribute dictionary because the 'id' HTML tag attribute is
2021-03-03T09:34:50.9518485Z a valid HTML attribute for *any* HTML tag as per `this document
2021-03-03T09:34:50.9519874Z <https://html.spec.whatwg.org/multipage/dom.html#global-attributes>`__. ::
2021-03-03T09:34:50.9520961Z
2021-03-03T09:34:50.9521954Z attrs = {'asdf': 'table'}
2021-03-03T09:34:50.9524613Z
2021-03-03T09:34:50.9527108Z is *not* a valid attribute dictionary because 'asdf' is not a valid
2021-03-03T09:34:50.9528585Z HTML attribute even if it is a valid XML attribute. Valid HTML 4.01
2021-03-03T09:34:50.9529710Z table attributes can be found `here
2021-03-03T09:34:50.9531196Z <http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2>`__. A
2021-03-03T09:34:50.9532780Z working draft of the HTML 5 spec can be found `here
2021-03-03T09:34:50.9534290Z <https://html.spec.whatwg.org/multipage/tables.html>`__. It contains the
2021-03-03T09:34:50.9535412Z latest information on table attributes for the modern web.
2021-03-03T09:34:50.9536751Z
2021-03-03T09:34:50.9537819Z parse_dates : bool, optional
2021-03-03T09:34:50.9539089Z See :func:`~read_csv` for more details.
2021-03-03T09:34:50.9540539Z
2021-03-03T09:34:50.9541838Z thousands : str, optional
2021-03-03T09:34:50.9543300Z Separator to use to parse thousands. Defaults to ``','``.
2021-03-03T09:34:50.9544304Z
2021-03-03T09:34:50.9545341Z encoding : str, optional
2021-03-03T09:34:50.9546733Z The encoding used to decode the web page. Defaults to ``None``.``None``
2021-03-03T09:34:50.9548178Z preserves the previous encoding behavior, which depends on the
2021-03-03T09:34:50.9549386Z underlying parser library (e.g., the parser library will try to use
2021-03-03T09:34:50.9550583Z the encoding provided by the document).
2021-03-03T09:34:50.9551571Z
2021-03-03T09:34:50.9552794Z decimal : str, default '.'
2021-03-03T09:34:50.9553961Z Character to recognize as decimal point (e.g. use ',' for European
2021-03-03T09:34:50.9555156Z data).
2021-03-03T09:34:50.9556075Z
2021-03-03T09:34:50.9557126Z converters : dict, default None
2021-03-03T09:34:50.9558260Z Dict of functions for converting values in certain columns. Keys can
2021-03-03T09:34:50.9559448Z either be integers or column labels, values are functions that take one
2021-03-03T09:34:50.9561145Z input argument, the cell (not column) content, and return the
2021-03-03T09:34:50.9562185Z transformed content.
2021-03-03T09:34:50.9563144Z
2021-03-03T09:34:50.9564371Z na_values : iterable, default None
2021-03-03T09:34:50.9565328Z Custom NA values.
2021-03-03T09:34:50.9566234Z
2021-03-03T09:34:50.9567466Z keep_default_na : bool, default True
2021-03-03T09:34:50.9568610Z If na_values are specified and keep_default_na is False the default NaN
2021-03-03T09:34:50.9569944Z values are overridden, otherwise they're appended to.
2021-03-03T09:34:50.9571026Z
2021-03-03T09:34:50.9572069Z displayed_only : bool, default True
2021-03-03T09:34:50.9573777Z Whether elements with "display: none" should be parsed.
2021-03-03T09:34:50.9574758Z
2021-03-03T09:34:50.9575740Z Returns
2021-03-03T09:34:50.9576684Z -------
2021-03-03T09:34:50.9577583Z dfs
2021-03-03T09:34:50.9578572Z A list of DataFrames.
2021-03-03T09:34:50.9579501Z
2021-03-03T09:34:50.9580619Z See Also
2021-03-03T09:34:50.9581611Z --------
2021-03-03T09:34:50.9584490Z read_csv : Read a comma-separated values (csv) file into DataFrame.
2021-03-03T09:34:50.9585488Z
2021-03-03T09:34:50.9586627Z Notes
2021-03-03T09:34:50.9587704Z -----
2021-03-03T09:34:50.9589093Z Before using this function you should read the :ref:`gotchas about the
2021-03-03T09:34:50.9590662Z HTML parsing libraries <io.html.gotchas>`.
2021-03-03T09:34:50.9591679Z
2021-03-03T09:34:50.9593980Z Expect to do some cleanup after you call this function. For example, you
2021-03-03T09:34:50.9595393Z might need to manually assign column names if the column names are
2021-03-03T09:34:50.9596531Z converted to NaN when you pass the `header=0` argument. We try to assume as
2021-03-03T09:34:50.9597875Z little as possible about the structure of the table and push the
2021-03-03T09:34:50.9598988Z idiosyncrasies of the HTML contained in the table to the user.
2021-03-03T09:34:50.9600154Z
2021-03-03T09:34:50.9601555Z This function searches for ``<table>`` elements and only for ``<tr>``
2021-03-03T09:34:50.9602693Z and ``<th>`` rows and ``<td>`` elements within each ``<tr>`` or ``<th>``
2021-03-03T09:34:50.9603978Z element in the table. ``<td>`` stands for "table data". This function
2021-03-03T09:34:50.9605142Z attempts to properly handle ``colspan`` and ``rowspan`` attributes.
2021-03-03T09:34:50.9606361Z If the function has a ``<thead>`` argument, it is used to construct
2021-03-03T09:34:50.9607735Z the header, otherwise the function attempts to find the header within
2021-03-03T09:34:50.9611377Z the body (by putting rows with only ``<th>`` elements into the header).
2021-03-03T09:34:50.9612659Z
2021-03-03T09:34:50.9613857Z Similar to :func:`~read_csv` the `header` argument is applied
2021-03-03T09:34:50.9615108Z **after** `skiprows` is applied.
2021-03-03T09:34:50.9616445Z
2021-03-03T09:34:50.9617649Z This function will *always* return a list of :class:`DataFrame` *or*
2021-03-03T09:34:50.9618836Z it will fail, e.g., it will *not* return an empty list.
2021-03-03T09:34:50.9620175Z
2021-03-03T09:34:50.9621156Z Examples
2021-03-03T09:34:50.9622483Z --------
2021-03-03T09:34:50.9623759Z See the :ref:`read_html documentation in the IO section of the docs
2021-03-03T09:34:50.9625741Z <io.read_html>` for some examples of reading in HTML tables.
2021-03-03T09:34:50.9626654Z """
2021-03-03T09:34:50.9629961Z _importers()
2021-03-03T09:34:50.9630986Z
2021-03-03T09:34:50.9632375Z # Type check here. We don't want to parse only to fail because of an
2021-03-03T09:34:50.9636697Z # invalid value of an integer skiprows.
2021-03-03T09:34:50.9645805Z if isinstance(skiprows, numbers.Integral) and skiprows < 0:
2021-03-03T09:34:50.9647398Z raise ValueError(
2021-03-03T09:34:50.9648490Z "cannot skip rows starting from the end of the "
2021-03-03T09:34:50.9649597Z "data (you passed a negative value)"
2021-03-03T09:34:50.9650845Z )
2021-03-03T09:34:50.9651870Z validate_header_arg(header)
2021-03-03T09:34:50.9655846Z
2021-03-03T09:34:50.9657475Z io = stringify_path(io)
2021-03-03T09:34:50.9658425Z
2021-03-03T09:34:50.9659385Z > return _parse(
2021-03-03T09:34:50.9660473Z flavor=flavor,
2021-03-03T09:34:50.9661514Z io=io,
2021-03-03T09:34:50.9662746Z match=match,
2021-03-03T09:34:50.9665347Z header=header,
2021-03-03T09:34:50.9666332Z index_col=index_col,
2021-03-03T09:34:50.9667304Z skiprows=skiprows,
2021-03-03T09:34:50.9668269Z parse_dates=parse_dates,
2021-03-03T09:34:50.9669246Z thousands=thousands,
2021-03-03T09:34:50.9670791Z attrs=attrs,
2021-03-03T09:34:50.9671907Z encoding=encoding,
2021-03-03T09:34:50.9673510Z decimal=decimal,
2021-03-03T09:34:50.9674463Z converters=converters,
2021-03-03T09:34:50.9675720Z na_values=na_values,
2021-03-03T09:34:50.9676696Z keep_default_na=keep_default_na,
2021-03-03T09:34:50.9677853Z displayed_only=displayed_only,
2021-03-03T09:34:50.9678795Z )
2021-03-03T09:34:50.9679392Z
2021-03-03T09:34:50.9680319Z pandas\io\html.py:1101:
2021-03-03T09:34:50.9681587Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9682233Z
2021-03-03T09:34:50.9683012Z flavor = ('bs4',), io = 'D:\\a\\1\\s\\pandas\\tests\\io\\data\\html\\spam.html'
2021-03-03T09:34:50.9684431Z match = '.+', attrs = None, encoding = None, displayed_only = True
2021-03-03T09:34:50.9685691Z kwargs = {'converters': None, 'decimal': '.', 'header': None, 'index_col': None, ...}
2021-03-03T09:34:50.9686805Z compiled_match = re.compile('.+'), retained = None, flav = 'bs4'
2021-03-03T09:34:50.9687627Z
2021-03-03T09:34:50.9688479Z def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
2021-03-03T09:34:50.9689643Z flavor = _validate_flavor(flavor)
2021-03-03T09:34:50.9691093Z compiled_match = re.compile(match) # you can pass a compiled regex here
2021-03-03T09:34:50.9692126Z
2021-03-03T09:34:50.9693385Z retained = None
2021-03-03T09:34:50.9694360Z for flav in flavor:
2021-03-03T09:34:50.9697075Z > parser = _parser_dispatch(flav)
2021-03-03T09:34:50.9697778Z
2021-03-03T09:34:50.9698570Z pandas\io\html.py:905:
2021-03-03T09:34:50.9699858Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9700874Z
2021-03-03T09:34:50.9701578Z flavor = 'bs4'
2021-03-03T09:34:50.9702089Z
2021-03-03T09:34:50.9703080Z def _parser_dispatch(flavor):
2021-03-03T09:34:50.9703985Z """
2021-03-03T09:34:50.9705294Z Choose the parser based on the input flavor.
2021-03-03T09:34:50.9706453Z
2021-03-03T09:34:50.9707647Z Parameters
2021-03-03T09:34:50.9708798Z ----------
2021-03-03T09:34:50.9710072Z flavor : str
2021-03-03T09:34:50.9711281Z The type of parser to use. This must be a valid backend.
2021-03-03T09:34:50.9712730Z
2021-03-03T09:34:50.9713616Z Returns
2021-03-03T09:34:50.9714697Z -------
2021-03-03T09:34:50.9715908Z cls : _HtmlFrameParser subclass
2021-03-03T09:34:50.9717015Z The parser class based on the requested input flavor.
2021-03-03T09:34:50.9717942Z
2021-03-03T09:34:50.9718843Z Raises
2021-03-03T09:34:50.9719952Z ------
2021-03-03T09:34:50.9721500Z ValueError
2021-03-03T09:34:50.9722526Z * If `flavor` is not a valid backend.
2021-03-03T09:34:50.9723578Z ImportError
2021-03-03T09:34:50.9725002Z * If you do not have the requested `flavor`
2021-03-03T09:34:50.9725921Z """
2021-03-03T09:34:50.9726921Z valid_parsers = list(_valid_parsers.keys())
2021-03-03T09:34:50.9728115Z if flavor not in valid_parsers:
2021-03-03T09:34:50.9729121Z raise ValueError(
2021-03-03T09:34:50.9730409Z f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}"
2021-03-03T09:34:50.9731612Z )
2021-03-03T09:34:50.9732646Z
2021-03-03T09:34:50.9733997Z if flavor in ("bs4", "html5lib"):
2021-03-03T09:34:50.9735079Z if not _HAS_HTML5LIB:
2021-03-03T09:34:50.9736256Z > raise ImportError("html5lib not found, please install it")
2021-03-03T09:34:50.9739210Z E ImportError: html5lib not found, please install it
2021-03-03T09:34:50.9741569Z
2021-03-03T09:34:50.9743398Z pandas\io\html.py:854: ImportError
2021-03-03T09:34:50.9744168Z
2021-03-03T09:34:50.9745062Z During handling of the above exception, another exception occurred:
2021-03-03T09:34:50.9745611Z
2021-03-03T09:34:50.9746799Z monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x000002A71EF656D0>
2021-03-03T09:34:50.9748701Z datapath = <function datapath.<locals>.deco at 0x000002A71EEA7040>
2021-03-03T09:34:50.9749285Z
2021-03-03T09:34:50.9750140Z @td.skip_if_no("bs4")
2021-03-03T09:34:50.9752292Z def test_bs4_version_fails(monkeypatch, datapath):
2021-03-03T09:34:50.9753395Z import bs4
2021-03-03T09:34:50.9754430Z
2021-03-03T09:34:50.9755519Z monkeypatch.setattr(bs4, "__version__", "4.2")
2021-03-03T09:34:50.9757033Z with pytest.raises(ImportError, match="Pandas requires version"):
2021-03-03T09:34:50.9758425Z > read_html(datapath("io", "data", "html", "spam.html"), flavor="bs4")
2021-03-03T09:34:50.9759758Z E AssertionError: Regex pattern 'Pandas requires version' does not match 'html5lib not found, please install it'.
2021-03-03T09:34:50.9760716Z
2021-03-03T09:34:50.9762174Z pandas\tests\io\test_html.py:77: AssertionError
2021-03-03T09:34:50.9763450Z _____________________________ test_same_ordering ______________________________
2021-03-03T09:34:50.9764923Z [gw3] win32 -- Python 3.8.8 C:\Miniconda\envs\pandas-dev\python.exe
2021-03-03T09:34:50.9765551Z
2021-03-03T09:34:50.9766362Z datapath = <function datapath.<locals>.deco at 0x000002A71EEA7430>
2021-03-03T09:34:50.9766872Z
2021-03-03T09:34:50.9767640Z @td.skip_if_no("bs4")
2021-03-03T09:34:50.9768481Z @td.skip_if_no("lxml")
2021-03-03T09:34:50.9769582Z def test_same_ordering(datapath):
2021-03-03T09:34:50.9771581Z filename = datapath("io", "data", "html", "valid_markup.html")
2021-03-03T09:34:50.9772659Z dfs_lxml = read_html(filename, index_col=0, flavor=["lxml"])
2021-03-03T09:34:50.9773803Z > dfs_bs4 = read_html(filename, index_col=0, flavor=["bs4"])
2021-03-03T09:34:50.9774600Z
2021-03-03T09:34:50.9775542Z pandas\tests\io\test_html.py:94:
2021-03-03T09:34:50.9776496Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9777952Z pandas\util\_decorators.py:310: in wrapper
2021-03-03T09:34:50.9778968Z return func(*args, **kwargs)
2021-03-03T09:34:50.9780021Z pandas\io\html.py:1101: in read_html
2021-03-03T09:34:50.9781455Z return _parse(
2021-03-03T09:34:50.9782553Z pandas\io\html.py:905: in _parse
2021-03-03T09:34:50.9783499Z parser = _parser_dispatch(flav)
2021-03-03T09:34:50.9785072Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9785765Z
2021-03-03T09:34:50.9786505Z flavor = 'bs4'
2021-03-03T09:34:50.9786937Z
2021-03-03T09:34:50.9787755Z def _parser_dispatch(flavor):
2021-03-03T09:34:50.9788587Z """
2021-03-03T09:34:50.9789891Z Choose the parser based on the input flavor.
2021-03-03T09:34:50.9791126Z
2021-03-03T09:34:50.9792223Z Parameters
2021-03-03T09:34:50.9793762Z ----------
2021-03-03T09:34:50.9794765Z flavor : str
2021-03-03T09:34:50.9795933Z The type of parser to use. This must be a valid backend.
2021-03-03T09:34:50.9797081Z
2021-03-03T09:34:50.9798045Z Returns
2021-03-03T09:34:50.9799239Z -------
2021-03-03T09:34:50.9800943Z cls : _HtmlFrameParser subclass
2021-03-03T09:34:50.9801829Z The parser class based on the requested input flavor.
2021-03-03T09:34:50.9803173Z
2021-03-03T09:34:50.9804333Z Raises
2021-03-03T09:34:50.9805377Z ------
2021-03-03T09:34:50.9806666Z ValueError
2021-03-03T09:34:50.9807877Z * If `flavor` is not a valid backend.
2021-03-03T09:34:50.9808911Z ImportError
2021-03-03T09:34:50.9810250Z * If you do not have the requested `flavor`
2021-03-03T09:34:50.9811402Z """
2021-03-03T09:34:50.9812458Z valid_parsers = list(_valid_parsers.keys())
2021-03-03T09:34:50.9813575Z if flavor not in valid_parsers:
2021-03-03T09:34:50.9815089Z raise ValueError(
2021-03-03T09:34:50.9816319Z f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}"
2021-03-03T09:34:50.9817541Z )
2021-03-03T09:34:50.9818602Z
2021-03-03T09:34:50.9819657Z if flavor in ("bs4", "html5lib"):
2021-03-03T09:34:50.9820905Z if not _HAS_HTML5LIB:
2021-03-03T09:34:50.9822084Z > raise ImportError("html5lib not found, please install it")
2021-03-03T09:34:50.9823452Z E ImportError: html5lib not found, please install it
2021-03-03T09:34:50.9824265Z
2021-03-03T09:34:50.9825003Z pandas\io\html.py:854: ImportError
2021-03-03T09:34:50.9825973Z ____________________ TestReadHtml.test_to_html_compat[bs4] ____________________
2021-03-03T09:34:50.9827714Z [gw3] win32 -- Python 3.8.8 C:\Miniconda\envs\pandas-dev\python.exe
2021-03-03T09:34:50.9828410Z
2021-03-03T09:34:50.9829267Z self = <pandas.tests.io.test_html.TestReadHtml object at 0x000002A71EDF1850>
2021-03-03T09:34:50.9830475Z
2021-03-03T09:34:50.9831536Z def test_to_html_compat(self):
2021-03-03T09:34:50.9832727Z df = (
2021-03-03T09:34:50.9833922Z tm.makeCustomDataframe(
2021-03-03T09:34:50.9834959Z 4,
2021-03-03T09:34:50.9835999Z 3,
2021-03-03T09:34:50.9837656Z data_gen_f=lambda *args: np.random.rand(),
2021-03-03T09:34:50.9838744Z c_idx_names=False,
2021-03-03T09:34:50.9850315Z r_idx_names=False,
2021-03-03T09:34:50.9851844Z )
2021-03-03T09:34:50.9852936Z .applymap("{:.3f}".format)
2021-03-03T09:34:50.9854049Z .astype(float)
2021-03-03T09:34:50.9855186Z )
2021-03-03T09:34:50.9856391Z out = df.to_html()
2021-03-03T09:34:50.9857792Z > res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0]
2021-03-03T09:34:50.9858510Z
2021-03-03T09:34:50.9859338Z pandas\tests\io\test_html.py:132:
2021-03-03T09:34:50.9860736Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9861961Z pandas\util\_decorators.py:310: in wrapper
2021-03-03T09:34:50.9863212Z return func(*args, **kwargs)
2021-03-03T09:34:50.9864610Z pandas\io\html.py:1101: in read_html
2021-03-03T09:34:50.9865602Z return _parse(
2021-03-03T09:34:50.9866631Z pandas\io\html.py:905: in _parse
2021-03-03T09:34:50.9867680Z parser = _parser_dispatch(flav)
2021-03-03T09:34:50.9868829Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9869511Z
2021-03-03T09:34:50.9870601Z flavor = 'bs4'
2021-03-03T09:34:50.9871200Z
2021-03-03T09:34:50.9871985Z def _parser_dispatch(flavor):
2021-03-03T09:34:50.9872961Z """
2021-03-03T09:34:50.9874403Z Choose the parser based on the input flavor.
2021-03-03T09:34:50.9876648Z
2021-03-03T09:34:50.9877662Z Parameters
2021-03-03T09:34:50.9878646Z ----------
2021-03-03T09:34:50.9879598Z flavor : str
2021-03-03T09:34:50.9881204Z The type of parser to use. This must be a valid backend.
2021-03-03T09:34:50.9882129Z
2021-03-03T09:34:50.9883450Z Returns
2021-03-03T09:34:50.9884408Z -------
2021-03-03T09:34:50.9885677Z cls : _HtmlFrameParser subclass
2021-03-03T09:34:50.9887041Z The parser class based on the requested input flavor.
2021-03-03T09:34:50.9888150Z
2021-03-03T09:34:50.9889131Z Raises
2021-03-03T09:34:50.9890445Z ------
2021-03-03T09:34:50.9891252Z ValueError
2021-03-03T09:34:50.9892685Z * If `flavor` is not a valid backend.
2021-03-03T09:34:50.9893689Z ImportError
2021-03-03T09:34:50.9895137Z * If you do not have the requested `flavor`
2021-03-03T09:34:50.9896140Z """
2021-03-03T09:34:50.9897186Z valid_parsers = list(_valid_parsers.keys())
2021-03-03T09:34:50.9898667Z if flavor not in valid_parsers:
2021-03-03T09:34:50.9899580Z raise ValueError(
2021-03-03T09:34:50.9901350Z f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}"
2021-03-03T09:34:50.9903003Z )
2021-03-03T09:34:50.9904985Z
2021-03-03T09:34:50.9907240Z if flavor in ("bs4", "html5lib"):
2021-03-03T09:34:50.9908492Z if not _HAS_HTML5LIB:
2021-03-03T09:34:50.9909912Z > raise ImportError("html5lib not found, please install it")
2021-03-03T09:34:50.9911782Z E ImportError: html5lib not found, please install it
2021-03-03T09:34:50.9912325Z
2021-03-03T09:34:50.9913065Z pandas\io\html.py:854: ImportError
2021-03-03T09:34:50.9914414Z _______________________ TestReadHtml.test_spam_url[bs4] _______________________
2021-03-03T09:34:50.9915596Z [gw3] win32 -- Python 3.8.8 C:\Miniconda\envs\pandas-dev\python.exe
2021-03-03T09:34:50.9916260Z
2021-03-03T09:34:50.9917104Z self = <pandas.tests.io.test_html.TestReadHtml object at 0x000002A71F00E520>
2021-03-03T09:34:50.9917669Z
2021-03-03T09:34:50.9918797Z @tm.network
2021-03-03T09:34:50.9920021Z def test_spam_url(self):
2021-03-03T09:34:50.9920972Z url = (
2021-03-03T09:34:50.9922490Z "https://raw.githubusercontent.com/pandas-dev/pandas/master/"
2021-03-03T09:34:50.9923734Z "pandas/tests/io/data/html/spam.html"
2021-03-03T09:34:50.9925069Z )
2021-03-03T09:34:50.9926162Z > df1 = self.read_html(url, match=".*Water.*")
2021-03-03T09:34:50.9926769Z
2021-03-03T09:34:50.9927572Z pandas\tests\io\test_html.py:166:
2021-03-03T09:34:50.9928594Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9929920Z pandas\util\_decorators.py:310: in wrapper
2021-03-03T09:34:50.9931095Z return func(*args, **kwargs)
2021-03-03T09:34:50.9932295Z pandas\io\html.py:1101: in read_html
2021-03-03T09:34:50.9933330Z return _parse(
2021-03-03T09:34:50.9934390Z pandas\io\html.py:905: in _parse
2021-03-03T09:34:50.9935584Z parser = _parser_dispatch(flav)
2021-03-03T09:34:50.9936804Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2021-03-03T09:34:50.9937707Z
2021-03-03T09:34:50.9938466Z flavor = 'bs4'
2021-03-03T09:34:50.9938912Z
2021-03-03T09:34:50.9939676Z def _parser_dispatch(flavor):
2021-03-03T09:34:50.9940660Z """
2021-03-03T09:34:50.9942408Z Choose the parser based on the input flavor.
2021-03-03T09:34:50.9943364Z
2021-03-03T09:34:50.9944419Z Parameters
2021-03-03T09:34:50.9945528Z ----------
2021-03-03T09:34:50.9946538Z flavor : str
2021-03-03T09:34:50.9947615Z The type of parser to use. This must be a valid backend.
2021-03-03T09:34:50.9948607Z
2021-03-03T09:34:50.9950485Z Returns
2021-03-03T09:34:50.9951422Z -------
2021-03-03T09:34:50.9952595Z cls : _HtmlFrameParser subclass
2021-03-03T09:34:50.9953879Z The parser class based on the requested input flavor.
2021-03-03T09:34:50.9954835Z
2021-03-03T09:34:50.9955766Z Raises
2021-03-03T09:34:50.9956889Z ------
2021-03-03T09:34:50.9957885Z ValueError
2021-03-03T09:34:50.9958927Z * If `flavor` is not a valid backend.
2021-03-03T09:34:50.9960085Z ImportError
2021-03-03T09:34:50.9961314Z * If you do not have the requested `flavor`
2021-03-03T09:34:50.9962245Z """
2021-03-03T09:34:50.9963716Z valid_parsers = list(_valid_parsers.keys())
2021-03-03T09:34:50.9964776Z if flavor not in valid_parsers:
2021-03-03T09:34:50.9965879Z raise ValueError(
2021-03-03T09:34:50.9967523Z f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}"
2021-03-03T09:34:50.9968532Z )
2021-03-03T09:34:50.9969460Z
2021-03-03T09:34:50.9970634Z if flavor in ("bs4", "html5lib"):
2021-03-03T09:34:50.9971682Z if not _HAS_HTML5LIB:
2021-03-03T09:34:50.9973164Z > raise ImportError("html5lib not found, please install it")
2021-03-03T09:34:50.9974624Z E ImportError: html5lib not found, please install it
2021-03-03T09:34:50.9975686Z
2021-03-03T09:34:50.9976475Z pandas\io\html.py:854: ImportError
2021-03-03T09:34:50.9977489Z _________________________ TestReadHtml.test_spam[bs4] _________________________