diff --git a/Classes/PHPExcel/Reader/HTML.php b/Classes/PHPExcel/Reader/HTML.php
index a19eaec9d..0ac550f9a 100644
--- a/Classes/PHPExcel/Reader/HTML.php
+++ b/Classes/PHPExcel/Reader/HTML.php
@@ -12,6 +12,7 @@
* PHPExcel_Reader_HTML
*
* Copyright (c) 2006 - 2015 PHPExcel
+ * Copyright (c) 2015 Wine Logistix GmbH
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -30,11 +31,12 @@
* @category PHPExcel
* @package PHPExcel_Reader
* @copyright Copyright (c) 2006 - 2015 PHPExcel (http://www.codeplex.com/PHPExcel)
+ * @copyright Copyright (c) 2015 Wine Logistix GmbH (http://www.wine-logistix.de)
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
* @version ##VERSION##, ##DATE##
*/
/** PHPExcel root directory */
-class PHPExcel_Reader_HTML extends PHPExcel_Reader_Abstract implements PHPExcel_Reader_IReader
+class PHPExcel_Reader_HTML extends PHPExcel_Reader_HTML_Abstract
{
/**
@@ -123,39 +125,6 @@ public function __construct()
$this->readFilter = new PHPExcel_Reader_DefaultReadFilter();
}
- /**
- * Validate that the current file is an HTML file
- *
- * @return boolean
- */
- protected function isValidFormat()
- {
- // Reading 2048 bytes should be enough to validate that the format is HTML
- $data = fread($this->fileHandle, 2048);
- if ((strpos($data, '<') !== false) &&
- (strlen($data) !== strlen(strip_tags($data)))) {
- return true;
- }
-
- return false;
- }
-
- /**
- * Loads PHPExcel from file
- *
- * @param string $pFilename
- * @return PHPExcel
- * @throws PHPExcel_Reader_Exception
- */
- public function load($pFilename)
- {
- // Create new PHPExcel
- $objPHPExcel = new PHPExcel();
-
- // Load into this instance
- return $this->loadIntoExisting($pFilename, $objPHPExcel);
- }
-
/**
* Set input encoding
*
@@ -183,6 +152,12 @@ public function getInputEncoding()
protected $tableLevel = 0;
protected $nestedColumn = array('A');
+ /**
+ * Active Worksheet which is used for writing to.
+ * @var PHPExcel_Worksheet
+ */
+ protected $sheet;
+
protected function setTableStartColumn($column)
{
if ($this->tableLevel == 0) {
@@ -206,7 +181,21 @@ protected function releaseTableStartColumn()
return array_pop($this->nestedColumn);
}
- protected function flushCell($sheet, $column, $row, &$cellContent)
+ protected function loadHandler(PHPExcel $objPHPExcel)
+ {
+ // Create new PHPExcel worksheets.
+ while ($objPHPExcel->getSheetCount() <= $this->sheetIndex) {
+ $objPHPExcel->createSheet();
+ }
+ $objPHPExcel->setActiveSheetIndex($this->sheetIndex);
+ $this->sheet = $objPHPExcel->getActiveSheet();
+ }
+
+ protected function finishHandler()
+ {
+ }
+
+ protected function flushCell($column, $row, &$cellContent)
{
if (is_string($cellContent)) {
// Simple String content
@@ -215,7 +204,7 @@ protected function flushCell($sheet, $column, $row, &$cellContent)
// echo 'FLUSH CELL: ' , $column , $row , ' => ' , $cellContent , '
';
// Write to worksheet to be done here...
// ... we return the cell so we can mess about with styles more easily
- $sheet->setCellValue($column . $row, $cellContent, true);
+ $this->sheet->setCellValue($column . $row, $cellContent, true);
$this->dataArray[$row][$column] = $cellContent;
}
} else {
@@ -226,28 +215,29 @@ protected function flushCell($sheet, $column, $row, &$cellContent)
$cellContent = (string) '';
}
- protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, &$cellContent, $format = null)
+ protected function textElementHandler(DOMNode $element, &$row, &$column, &$cellContent)
{
- foreach ($element->childNodes as $child) {
- if ($child instanceof DOMText) {
- $domText = preg_replace('/\s+/u', ' ', trim($child->nodeValue));
- if (is_string($cellContent)) {
- // simply append the text if the cell content is a plain text string
- $cellContent .= $domText;
- } else {
- // but if we have a rich text run instead, we need to append it correctly
- // TODO
- }
- } elseif ($child instanceof DOMElement) {
-// echo 'DOM ELEMENT: ' , strtoupper($child->nodeName) , '
';
+ $domText = preg_replace('/\s+/u', ' ', trim($element->nodeValue));
+ if (is_string($cellContent)) {
+ // simply append the text if the cell content is a plain text string
+ $cellContent .= $domText;
+ } else {
+ // but if we have a rich text run instead, we need to append it correctly
+ // TODO
+ }
+ }
+
+ protected function defaultElementHandler(DOMNode $element, &$row, &$column, &$cellContent, $format = null)
+ {
+// echo 'DOM ELEMENT: ' , strtoupper($element->nodeName) , '
';
$attributeArray = array();
- foreach ($child->attributes as $attribute) {
+ foreach ($element->attributes as $attribute) {
// echo 'ATTRIBUTE: ' , $attribute->name , ' => ' , $attribute->value , '
';
$attributeArray[$attribute->name] = $attribute->value;
}
- switch ($child->nodeName) {
+ switch ($element->nodeName) {
case 'meta':
foreach ($attributeArray as $attributeName => $attributeValue) {
switch ($attributeName) {
@@ -257,11 +247,11 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column,
break;
}
}
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
break;
case 'title':
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
- $sheet->setTitle($cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
+ $this->sheet->setTitle($cellContent);
$cellContent = '';
break;
case 'span':
@@ -275,20 +265,20 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column,
if ($cellContent > '') {
$cellContent .= ' ';
}
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
if ($cellContent > '') {
$cellContent .= ' ';
}
// echo 'END OF STYLING, SPAN OR DIV
';
break;
case 'hr':
- $this->flushCell($sheet, $column, $row, $cellContent);
+ $this->flushCell($column, $row, $cellContent);
++$row;
- if (isset($this->formats[$child->nodeName])) {
- $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
+ if (isset($this->formats[$element->nodeName])) {
+ $this->sheet->getStyle($column . $row)->applyFromArray($this->formats[$element->nodeName]);
} else {
$cellContent = '----------';
- $this->flushCell($sheet, $column, $row, $cellContent);
+ $this->flushCell($column, $row, $cellContent);
}
++$row;
// Add a break after a horizontal rule, simply by allowing the code to dropthru
@@ -298,7 +288,7 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column,
$cellContent .= "\n";
} else {
// Otherwise flush our existing content and move the row cursor on
- $this->flushCell($sheet, $column, $row, $cellContent);
+ $this->flushCell($column, $row, $cellContent);
++$row;
}
// echo 'HARD LINE BREAK: ' , '
';
@@ -309,15 +299,15 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column,
switch ($attributeName) {
case 'href':
// echo 'Link to ' , $attributeValue , '
';
- $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
- if (isset($this->formats[$child->nodeName])) {
- $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
+ $this->sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
+ if (isset($this->formats[$element->nodeName])) {
+ $this->sheet->getStyle($column . $row)->applyFromArray($this->formats[$element->nodeName]);
}
break;
}
}
$cellContent .= ' ';
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
// echo 'END OF HYPERLINK:' , '
';
break;
case 'h1':
@@ -333,20 +323,20 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column,
// If we're inside a table, replace with a \n
$cellContent .= "\n";
// echo 'LIST ENTRY: ' , '
';
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
// echo 'END OF LIST ENTRY:' , '
';
} else {
if ($cellContent > '') {
- $this->flushCell($sheet, $column, $row, $cellContent);
+ $this->flushCell($column, $row, $cellContent);
$row++;
}
// echo 'START OF PARAGRAPH: ' , '
';
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
// echo 'END OF PARAGRAPH:' , '
';
- $this->flushCell($sheet, $column, $row, $cellContent);
+ $this->flushCell($column, $row, $cellContent);
- if (isset($this->formats[$child->nodeName])) {
- $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
+ if (isset($this->formats[$element->nodeName])) {
+ $this->sheet->getStyle($column . $row)->applyFromArray($this->formats[$element->nodeName]);
}
$row++;
@@ -358,28 +348,28 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column,
// If we're inside a table, replace with a \n
$cellContent .= "\n";
// echo 'LIST ENTRY: ' , '
';
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
// echo 'END OF LIST ENTRY:' , '
';
} else {
if ($cellContent > '') {
- $this->flushCell($sheet, $column, $row, $cellContent);
+ $this->flushCell($column, $row, $cellContent);
}
++$row;
// echo 'LIST ENTRY: ' , '
';
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
// echo 'END OF LIST ENTRY:' , '
';
- $this->flushCell($sheet, $column, $row, $cellContent);
+ $this->flushCell($column, $row, $cellContent);
$column = 'A';
}
break;
case 'table':
- $this->flushCell($sheet, $column, $row, $cellContent);
+ $this->flushCell($column, $row, $cellContent);
$column = $this->setTableStartColumn($column);
// echo 'START OF TABLE LEVEL ' , $this->tableLevel , '
';
if ($this->tableLevel > 1) {
--$row;
}
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
// echo 'END OF TABLE LEVEL ' , $this->tableLevel , '
';
$column = $this->releaseTableStartColumn();
if ($this->tableLevel > 1) {
@@ -390,33 +380,33 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column,
break;
case 'thead':
case 'tbody':
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
break;
case 'tr':
$column = $this->getTableStartColumn();
$cellContent = '';
// echo 'START OF TABLE ' , $this->tableLevel , ' ROW
';
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
++$row;
// echo 'END OF TABLE ' , $this->tableLevel , ' ROW
';
break;
case 'th':
case 'td':
// echo 'START OF TABLE ' , $this->tableLevel , ' CELL
';
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
// echo 'END OF TABLE ' , $this->tableLevel , ' CELL
';
while (isset($this->rowspan[$column . $row])) {
++$column;
}
- $this->flushCell($sheet, $column, $row, $cellContent);
+ $this->flushCell($column, $row, $cellContent);
// if (isset($attributeArray['style']) && !empty($attributeArray['style'])) {
// $styleAry = $this->getPhpExcelStyleArray($attributeArray['style']);
//
// if (!empty($styleAry)) {
-// $sheet->getStyle($column . $row)->applyFromArray($styleAry);
+// $this->sheet->getStyle($column . $row)->applyFromArray($styleAry);
// }
// }
@@ -427,25 +417,25 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column,
++$columnTo;
}
$range = $column . $row . ':' . $columnTo . ($row + $attributeArray['rowspan'] - 1);
- foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
+ foreach (PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
$this->rowspan[$value] = true;
}
- $sheet->mergeCells($range);
+ $this->sheet->mergeCells($range);
$column = $columnTo;
} elseif (isset($attributeArray['rowspan'])) {
//create merging rowspan
$range = $column . $row . ':' . $column . ($row + $attributeArray['rowspan'] - 1);
- foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
+ foreach (PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) {
$this->rowspan[$value] = true;
}
- $sheet->mergeCells($range);
+ $this->sheet->mergeCells($range);
} elseif (isset($attributeArray['colspan'])) {
//create merging colspan
$columnTo = $column;
for ($i = 0; $i < $attributeArray['colspan'] - 1; $i++) {
++$columnTo;
}
- $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
+ $this->sheet->mergeCells($column . $row . ':' . $columnTo . $row);
$column = $columnTo;
}
++$column;
@@ -455,58 +445,12 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column,
$column = 'A';
$content = '';
$this->tableLevel = 0;
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
break;
default:
- $this->processDomElement($child, $sheet, $row, $column, $cellContent);
+ $this->processDomElement($element, $row, $column, $cellContent);
}
- }
- }
- }
-
- /**
- * Loads PHPExcel from file into PHPExcel instance
- *
- * @param string $pFilename
- * @param PHPExcel $objPHPExcel
- * @return PHPExcel
- * @throws PHPExcel_Reader_Exception
- */
- public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
- {
- // Open file to validate
- $this->openFile($pFilename);
- if (!$this->isValidFormat()) {
- fclose($this->fileHandle);
- throw new PHPExcel_Reader_Exception($pFilename . " is an Invalid HTML file.");
- }
- // Close after validating
- fclose($this->fileHandle);
-
- // Create new PHPExcel
- while ($objPHPExcel->getSheetCount() <= $this->sheetIndex) {
- $objPHPExcel->createSheet();
- }
- $objPHPExcel->setActiveSheetIndex($this->sheetIndex);
-
- // Create a new DOM object
- $dom = new domDocument;
- // Reload the HTML file into the DOM object
- $loaded = $dom->loadHTML($this->securityScanFile($pFilename));
- if ($loaded === false) {
- throw new PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document');
- }
-
- // Discard white space
- $dom->preserveWhiteSpace = false;
-
- $row = 0;
- $column = 'A';
- $content = '';
- $this->processDomElement($dom, $objPHPExcel->getActiveSheet(), $row, $column, $content);
-
- // Return
- return $objPHPExcel;
+ // This method does all traversing itself, no TRAVERSE_CHILD hint needed.
}
/**
@@ -532,18 +476,5 @@ public function setSheetIndex($pValue = 0)
return $this;
}
- /**
- * Scan theXML for use of ElementHandler where is lowercase element name.
+ * Explicit handlers must accept same arguments as defaultElementHandler.
+ *
+ * Other handlers exist which facilitate implementation specific behavior:
+ *
+ * flushCell - Write a cell value
+ * textElementHandler - Invoked for DOMText elements.
+ * loadHandler - Invoked before traversing the DOM.
+ * finishHandler - Invoked after traversing the DOM.
+ *
+ * Copyright (c) 2006 - 2015 PHPExcel
+ * Copyright (c) 2015 Wine Logistix GmbH
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * @category PHPExcel
+ * @package PHPExcel_Reader_HTML
+ * @copyright Copyright (c) 2006 - 2015 PHPExcel (http://www.codeplex.com/PHPExcel)
+ * @copyright Copyright (c) 2015 Wine Logistix (http://www.wine-logistix.de)
+ * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
+ * @version ##VERSION##, ##DATE##
+ */
+abstract class PHPExcel_Reader_HTML_Abstract extends PHPExcel_Reader_Abstract implements PHPExcel_Reader_IReader
+{
+
+ /**
+ * Tell processDomElement to traverse child elements of the current child
+ * element recursively.
+ * @var int
+ */
+ const TRAVERSE_CHILDS = 1;
+
+ /**
+ * Write cell content at specified position to active sheet.
+ * @param string $column
+ * @param int $row
+ * @param string $cellContent
+ */
+ protected abstract function flushCell($column, $row, &$cellContent);
+
+ /**
+ * Handler for elements with no explicit handler.
+ * @param DOMNode $element
+ * @param int $row
+ * @param string $column
+ * @param string $cellContent
+ * @return int|null TRAVERSE_CHILDS or null
+ */
+ protected abstract function defaultElementHandler(DOMNode $element, &$row, &$column, &$cellContent);
+
+ /**
+ * Handler for DOMText elements.
+ * @param DOMNode $element
+ * @param int $row
+ * @param string $column
+ * @param string $cellContent
+ */
+ protected abstract function textElementHandler(DOMNode $element, &$row, &$column, &$cellContent);
+
+ /**
+ * Handler which is executed after loading the HTML file and before
+ * traversing elements.
+ * @param PHPExcel $objPHPExcel
+ */
+ protected abstract function loadHandler(PHPExcel $objPHPExcel);
+
+ /**
+ * Handler which is executed after traversing elements and before
+ * returning from load method.
+ */
+ protected abstract function finishHandler();
+
+ /**
+ * Loads PHPExcel from file.
+ * @param string $pFilename
+ * @return PHPExcel
+ * @throws PHPExcel_Reader_Exception
+ */
+ public function load($pFilename)
+ {
+ // Create new PHPExcel
+ $objPHPExcel = new PHPExcel();
+ // Open file to validate
+ $this->openFile($pFilename);
+ if (!$this->isValidFileFormat()) {
+ fclose($this->fileHandle);
+ throw new PHPExcel_Reader_Exception($pFilename . " is an invalid HTML file.");
+ }
+ // Close after validating
+ fclose($this->fileHandle);
+ // Load into this instance
+ return $this->loadIntoExisting($pFilename, $objPHPExcel);
+ }
+
+ /**
+ * Loads PHPExcel from string.
+ * @param string $content HTML content
+ * @return PHPExcel
+ * @throws PHPExcel_Reader_Exception
+ */
+ public function loadFromString($content)
+ {
+ $objPHPExcel = new PHPExcel();
+ if (!$this->isValidFormat($content)) {
+ throw new PHPExcel_Reader_Exception("HTML content is invalid");
+ }
+ $html = $this->securityScan($content);
+ return $this->loadIntoExistingFromString($html, $objPHPExcel);
+ }
+
+ /**
+ * Loads PHPExcel from file into PHPExcel instance.
+ *
+ * @param string $pFilename
+ * @param PHPExcel $objPHPExcel
+ * @return PHPExcel
+ * @throws PHPExcel_Reader_Exception
+ */
+ public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel)
+ {
+ $html = $this->securityScanFile($pFilename);
+ return $this->loadIntoExistingFromString($html, $objPHPExcel);
+ }
+
+ /**
+ * Loads PHPExcel from string into PHPExcel instance.
+ */
+ protected function loadIntoExistingFromString($content, PHPExcel $objPHPExcel)
+ {
+ // This method is protected as it doesn't do the security scan on content.
+ // Create a new DOM object
+ $dom = new DOMDocument();
+ // Reload the HTML file into the DOM object
+ $loaded = $dom->loadHTML($content);
+ if ($loaded === false) {
+ throw new PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document');
+ }
+
+ // Discard white space
+ $dom->preserveWhiteSpace = false;
+
+ $row = 1;
+ $column = 'A';
+ $content = '';
+
+ // Allow implementation specific initalization after load.
+ $this->loadHandler($objPHPExcel);
+
+ $this->processDomElement($dom, $row, $column, $content);
+
+ // Allow implementation specific operation after processing.
+ $this->finishHandler();
+
+ // Return
+ return $objPHPExcel;
+ }
+
+ /**
+ * Validate that data contains HTML.
+ * @return boolean
+ */
+ protected function isValidFormat(&$data)
+ {
+ if ((strpos($data, '<') !== false) &&
+ (strlen($data) !== strlen(strip_tags($data)))) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Validate that the current file is an HTML file
+ *
+ * @return boolean
+ */
+ protected function isValidFileFormat()
+ {
+ // Reading 2048 bytes should be enough to validate that the format is HTML
+ $data = fread($this->fileHandle, 2048);
+ return $this->isValidFormat($data);
+ }
+
+ /**
+ * Traverse elements in DOM and invoke handler.
+ * A handler method in own object with name ElementHandler
+ * is invoked if the method exists, or defaultElementHandler if not.
+ * Handlers can indicate whether to traverse child elements, by returning
+ * TRAVERSE_CHILDS. Childs are traversed recursively.
+ * @param DOMNode $element Element of which childs are traversed.
+ * @param int $row Row number
+ * @param string $column Excel style column name
+ * @param $cellContent A buffer which can be used by implementation to store temporary cell content before flushing to cell.
+ */
+ protected function processDomElement(DOMNode $element, &$row, &$column, &$cellContent)
+ {
+ foreach ($element->childNodes as $child) {
+ if ($child instanceof DOMText) {
+ $this->textElementHandler($child, $row, $column, $cellContent);
+ } elseif ($child instanceof DOMElement) {
+ // For each element a handler is invoked dynamically. If you
+ // don't want to use dynamic dispatch, use defaultElementHandler.
+ $nodeName = $this->cleanNodeName($child->nodeName);
+ $handlerName = $nodeName . "ElementHandler";
+ $continueWith = (method_exists($this, $handlerName)
+ ? $this->{$handlerName}($child, $row, $column, $cellContent)
+ : $this->defaultElementHandler($child, $row, $column, $cellContent));
+ if ($continueWith === self::TRAVERSE_CHILDS && $child->hasChildNodes()) {
+ // Handlers may traverse the DOM themselves. To avoid
+ // unnecessary traversing in here, by default no childs of
+ // the child are traversed. If however indicated by handler
+ // to traverse childs, then do so.
+ $this->processDomElement($child, $row, $column, $cellContent);
+ }
+ }
+ }
+ }
+
+ protected function cleanNodeName($elementName)
+ {
+ return strtolower(preg_replace('/[^a-zA-Z0-9]/u', '', $elementName));
+ }
+
+ /**
+ * Scan theXML for use of caption - worksheet title
+ * table > thead - Header rows (formatted bold)
+ * table > tbody - Data rows (no formatting)
+ *
+ * Copyright (c) 2015 Wine Logistix GmbH
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * @category PHPExcel
+ * @package PHPExcel_Reader_HTML
+ * @copyright Copyright (c) 2015 Wine Logistix (http://www.wine-logistix.de)
+ * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
+ * @version ##VERSION##, ##DATE##
+ */
+class PHPExcel_Reader_HTML_SemanticTable extends PHPExcel_Reader_HTML_Abstract
+{
+
+ /**
+ * @var PHPExcel
+ */
+ protected $excel;
+
+ /**
+ * Write cell content at specified position to active sheet.
+ * @param int $row
+ * @param string $column
+ * @param string $cellContent
+ */
+ protected function flushCell($column, $row, &$cellContent)
+ {
+ if (is_string($cellContent)) {
+ $cellContent = trim($cellContent);
+ if ($cellContent !== '') {
+ $this->excel->getActiveSheet()->setCellValue($column.$row, $cellContent);
+ }
+ }
+ }
+
+ /**
+ * Handler for elements with no explicit handler.
+ * @param DOMNode $element
+ * @param int $row
+ * @param string $column
+ * @param string $cellContent
+ */
+ protected function defaultElementHandler(DOMNode $element, &$row, &$column, &$cellContent)
+ {
+ // This implementation doesn't care about any element except the ones
+ // for which an explicit handler is defined. To get to these elements
+ // though, children of the other elements need to be traversed.
+ return PHPExcel_Reader_HTML_Abstract::TRAVERSE_CHILDS;
+ }
+
+ /**
+ * Handler for DOMText elements.
+ * @param DOMNode $element
+ * @param int $row
+ * @param string $column
+ * @param string $cellContent
+ */
+ protected function textElementHandler(DOMNode $element, &$row, &$column, &$cellContent)
+ {
+ }
+
+ /**
+ * Handler which is executed after loading the HTML file and before
+ * traversing elements.
+ * @param PHPExcel $objPHPExcel
+ */
+ protected function loadHandler(PHPExcel $objPHPExcel)
+ {
+ $this->excel = $objPHPExcel;
+ // Remove first sheet because if no table elements are occured
+ // in document, then it's an error in the source file.
+ $this->excel->removeSheetByIndex(0);
+ }
+
+ protected function finishHandler()
+ {
+ if ($this->excel->getSheetCount() > 0) {
+ // This is cosmetic; during processing a worksheet was created
+ // for each table and the last created is set active. When opening
+ // the file in GUI, the last worksheet would open, but it's most
+ // likely desired to view the first worksheet first.
+ $this->excel->setActiveSheetIndex(0);
+ }
+ }
+
+ /**
+ * Set document title.
+ * @param DOMNode $element
+ * @param int $row
+ * @param string $column
+ * @param string $cellContent
+ */
+ protected function titleElementHandler(DOMNode $element, &$row, &$column, &$cellContent)
+ {
+ $this->excel->getProperties()->setTitle($element->textContent);
+ }
+
+ /**
+ * Create a new worksheet and use it as active sheet.
+ * @param DOMNode $element
+ * @param int $row
+ * @param string $column
+ * @param string $cellContent
+ */
+ protected function tableElementHandler(DOMNode $element, &$row, &$column, &$cellContent)
+ {
+ $sheetNum = $this->excel->getSheetCount();
+ $this->excel->createSheet();
+ $this->excel->setActiveSheetIndex($sheetNum);
+ // Row and column need to be reset.
+ $row = 1;
+ $column = 'A';
+ return PHPExcel_Reader_HTML_Abstract::TRAVERSE_CHILDS;
+ }
+
+ /**
+ * Set title of current active sheet.
+ * @param DOMNode $element
+ * @param int $row
+ * @param string $column
+ * @param string $cellContent
+ */
+ protected function captionElementHandler(DOMNode $element, &$row, &$column, &$cellContent)
+ {
+ $this->excel->getActiveSheet()->setTitle($element->textContent);
+ }
+
+ /**
+ * For each header row in thead, create a row with bold formatted columns.
+ * @param DOMNode $element
+ * @param int $row
+ * @param string $column
+ * @param string $cellContent
+ */
+ protected function theadElementHandler(DOMNode $element, &$row, &$column, &$cellContent)
+ {
+ foreach ($element->childNodes as $child) {
+ if ($this->isElement($child, "tr")) {
+ $this->createHeaderRow($child, $row);
+ $row += 1;
+ }
+ }
+ // Don't traverse childs as they are already traversed in here.
+ }
+
+ protected function tbodyElementHandler(DOMNode $element, &$row, &$column, &$cellContent)
+ {
+ foreach ($element->childNodes as $child) {
+ if ($this->isElement($child, "tr")) {
+ $this->createDataRow($child, $row);
+ $row += 1;
+ }
+ }
+ // Don't traverse childs as they are already traversed in here.
+ }
+
+ protected function createHeaderRow(DOMNode $theadRow, $row)
+ {
+ $column = 'A';
+ foreach ($theadRow->childNodes as $child) {
+ if ($this->isElement($child, "th")) {
+ $this->flushCell($column, $row, $child->textContent);
+ $column++;
+ }
+ }
+ // Formatting headers by using range is faster than doing it in the loop.
+ $range = sprintf('A%d:%s%d', $row, $column, $row);
+ $this->excel->getActiveSheet()->getStyle($range)->getFont()->setBold(true);
+ }
+
+ protected function createDataRow(DOMNode $tbodyRow, $row)
+ {
+ $column = 'A';
+ foreach ($tbodyRow->childNodes as $child) {
+ if ($this->isElement($child, "td")) {
+ $this->flushCell($column, $row, $child->textContent);
+ $column++;
+ }
+ }
+ }
+
+ private function isElement($el, $name) {
+ return $el instanceof DOMNode && $el->nodeName === $name;
+ }
+
+}