Skip to content

Commit 8fcb8e5

Browse files
committed
Merge pull request #7 from clue-labs/html
Improve parsing XHTML structure
2 parents aa04751 + 2ed2e7f commit 8fcb8e5

File tree

2 files changed

+46
-3
lines changed

2 files changed

+46
-3
lines changed

src/Io/Loader.php

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,36 @@
66

77
class Loader
88
{
9+
private $entities;
10+
11+
public function __construct(array $entities = null)
12+
{
13+
if ($entities === null) {
14+
// get all HTML entities (minus those for XML parsing)
15+
$entities = get_html_translation_table(HTML_ENTITIES, ENT_NOQUOTES, 'UTF-8');
16+
unset($entities['<'], $entities['>'], $entities['&']);
17+
}
18+
19+
$this->entities = $entities;
20+
}
21+
922
public function loadXmlFile($path)
1023
{
1124
return $this->loadXmlString(file_get_contents($path));
1225
}
1326

1427
public function loadXmlString($html)
1528
{
16-
// fix invalid markup of help link in footer of outdated ViewVC versions
29+
// fix invalid markup of outdated ViewVC versions
30+
// - help link in footer not terminated
31+
// - selected branch/tag in CVS "sticky tag" dropdown has not attribute value
32+
// - clear button for selected branch/tag has no trailing slash
1733
$html = str_replace('Help</strong></td>', 'Help</a></strong></td>', $html);
34+
$html = str_replace('selected>', 'selected="selected">', $html);
35+
$html = preg_replace('#<input([^\/]+)>#', '<input$1 />', $html);
1836

19-
// replace unneeded HTML entities
20-
$html = str_replace('&nbsp;', ' ', $html);
37+
// replace named HTML entities with their UTF-8 value
38+
$html = str_replace(array_values($this->entities), array_keys($this->entities), $html);
2139

2240
// clean up namespace declaration
2341
$html = str_replace('xmlns="', 'ns="', $html);

tests/Io/LoaderTest.php

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,29 @@ function ($path) {
2929
scandir(__DIR__ . '/../fixtures/')
3030
));
3131
}
32+
33+
public function testHtmlEntities()
34+
{
35+
$str = '<p>&auml;&hellip;&nbsp;&copy;</p>';
36+
$xml = $this->loader->loadXmlString($str);
37+
38+
// c3 a4 e2 80 a6 c2 a0 c2 a9
39+
$this->assertEquals('ä… ©', (string)$xml);
40+
}
41+
42+
public function testLoadInvalidMarkupInputNotClosed()
43+
{
44+
$str = '<input type="hidden">';
45+
$xml = $this->loader->loadXmlString($str);
46+
47+
$this->assertEquals('hidden', (string)$xml['type']);
48+
}
49+
50+
public function testLoadInvalidMarkupSelectedAttributeNoValue()
51+
{
52+
$str = '<option selected>this</option>';
53+
$xml = $this->loader->loadXmlString($str);
54+
55+
$this->assertEquals('selected', (string)$xml['selected']);
56+
}
3257
}

0 commit comments

Comments
 (0)