Skip to content

Commit a0bc962

Browse files
committed
Convert named HTML entities before processing as XML
1 parent 3bc96c8 commit a0bc962

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

src/Io/Loader.php

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,19 @@
66

77
class Loader
88
{
9+
private $entities;
10+
11+
public function __construct(array $entities = null)
12+
{
13+
if ($entities === null) {
14+
// get all HTML entities (minus those for XML parsing)
15+
$entities = get_html_translation_table(HTML_ENTITIES, ENT_NOQUOTES, 'UTF-8');
16+
unset($entities['<'], $entities['>'], $entities['&']);
17+
}
18+
19+
$this->entities = $entities;
20+
}
21+
922
public function loadXmlFile($path)
1023
{
1124
return $this->loadXmlString(file_get_contents($path));
@@ -16,8 +29,8 @@ public function loadXmlString($html)
1629
// fix invalid markup of help link in footer of outdated ViewVC versions
1730
$html = str_replace('Help</strong></td>', 'Help</a></strong></td>', $html);
1831

19-
// replace unneeded HTML entities
20-
$html = str_replace('&nbsp;', ' ', $html);
32+
// replace named HTML entities with their UTF-8 value
33+
$html = str_replace(array_values($this->entities), array_keys($this->entities), $html);
2134

2235
// clean up namespace declaration
2336
$html = str_replace('xmlns="', 'ns="', $html);

tests/Io/LoaderTest.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,13 @@ function ($path) {
2929
scandir(__DIR__ . '/../fixtures/')
3030
));
3131
}
32+
33+
public function testHtmlEntities()
34+
{
35+
$str = '<p>&auml;&hellip;&nbsp;&copy;</p>';
36+
$xml = $this->loader->loadXmlString($str);
37+
38+
// c3 a4 e2 80 a6 c2 a0 c2 a9
39+
$this->assertEquals('ä… ©', (string)$xml);
40+
}
3241
}

0 commit comments

Comments
 (0)