|
15 | 15 | # specific language governing permissions and limitations
|
16 | 16 | # under the License.
|
17 | 17 | import os
|
| 18 | +import re |
18 | 19 | from typing import Any
|
19 | 20 |
|
20 | 21 | import pyarrow as pa
|
@@ -1186,13 +1187,17 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame:
|
1186 | 1187 | def test_dataframe_repr_html(df) -> None:
|
1187 | 1188 | output = df._repr_html_()
|
1188 | 1189 |
|
1189 |
| - ref_html = """<table border='1'> |
1190 |
| - <tr><th>a</td><th>b</td><th>c</td></tr> |
1191 |
| - <tr><td>1</td><td>4</td><td>8</td></tr> |
1192 |
| - <tr><td>2</td><td>5</td><td>5</td></tr> |
1193 |
| - <tr><td>3</td><td>6</td><td>8</td></tr> |
1194 |
| - </table> |
1195 |
| - """ |
| 1190 | + # Since we've added a fair bit of processing to the html output, lets just verify |
| 1191 | + # the values we are expecting in the table exist. Use regex and ignore everything |
| 1192 | + # between the <th></th> and <td></td>. We also don't want the closing > on the |
| 1193 | + # td and th segments because that is where the formatting data is written. |
1196 | 1194 |
|
1197 |
| - # Ignore whitespace just to make this test look cleaner |
1198 |
| - assert output.replace(" ", "") == ref_html.replace(" ", "") |
| 1195 | + headers = ["a", "b", "c"] |
| 1196 | + headers = [f"<th(.*?)>{v}</th>" for v in headers] |
| 1197 | + header_pattern = "(.*?)".join(headers) |
| 1198 | + assert len(re.findall(header_pattern, output, re.DOTALL)) == 1 |
| 1199 | + |
| 1200 | + body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]] |
| 1201 | + body_lines = [f"<td(.*?)>{v}</td>" for inner in body_data for v in inner] |
| 1202 | + body_pattern = "(.*?)".join(body_lines) |
| 1203 | + assert len(re.findall(body_pattern, output, re.DOTALL)) == 1 |
0 commit comments