From 6379548646e627ae95729cca0f87c6d8d4ab4acb Mon Sep 17 00:00:00 2001 From: Aljosha Papsch Date: Wed, 12 Aug 2015 10:28:07 +0200 Subject: [PATCH 01/11] Refactor parts of PHPExcel_Reader_HTML into an abstract base class. --- Classes/PHPExcel/Reader/HTML/Abstract.php | 166 ++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 Classes/PHPExcel/Reader/HTML/Abstract.php diff --git a/Classes/PHPExcel/Reader/HTML/Abstract.php b/Classes/PHPExcel/Reader/HTML/Abstract.php new file mode 100644 index 000000000..3c4f896ef --- /dev/null +++ b/Classes/PHPExcel/Reader/HTML/Abstract.php @@ -0,0 +1,166 @@ +loadIntoExisting($pFilename, $objPHPExcel); + } + + /** + * Loads PHPExcel from file into PHPExcel instance. + * + * @param string $pFilename + * @param PHPExcel $objPHPExcel + * @return PHPExcel + * @throws PHPExcel_Reader_Exception + */ + public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel) + { + // Open file to validate + $this->openFile($pFilename); + if (!$this->isValidFormat()) { + fclose($this->fileHandle); + throw new PHPExcel_Reader_Exception($pFilename . " is an invalid HTML file."); + } + // Close after validating + fclose($this->fileHandle); + + // Create a new DOM object + $dom = new DOMDocument(); + // Reload the HTML file into the DOM object + $loaded = $dom->loadHTML($this->securityScanFile($pFilename)); + if ($loaded === false) { + throw new PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document'); + } + + // Discard white space + $dom->preserveWhiteSpace = false; + + $row = 0; + $column = 'A'; + $content = ''; + + // Allow implementation specific initalization after load. + $this->loadHandler($objPHPExcel); + + $this->processDomElement($dom, $row, $column, $content); + + // Return + return $objPHPExcel; + } + + /** + * Validate that the current file is an HTML file + * + * @return boolean + */ + protected function isValidFormat() + { + // Reading 2048 bytes should be enough to validate that the format is HTML + $data = fread($this->fileHandle, 2048); + if ((strpos($data, '<') !== false) && + (strlen($data) !== strlen(strip_tags($data)))) { + return true; + } + + return false; + } + + protected function processDomElement(DOMNode $element, &$row, &$column, &$cellContent) + { + foreach ($element->childNodes as $child) { + if ($child instanceof \DOMText) { + $this->textElementHandler($child, $row, $column, $cellContent); + } elseif ($child instanceof \DOMElement) { + // For each element a handler is invoked dynamically. If you don't want to use + // dynamic dispatch, use defaultElementHandler. + $nodeName = $this->cleanNodeName($child->nodeName); + $handlerName = $nodeName . "ElementHandler"; + if (method_exists($this, $handlerName)) { + $this->{$handlerName}($child, $row, $column, $cellContent); + } else { + $this->defaultElementHandler($child, $row, $column, $cellContent); + } + } + } + } + + protected function cleanNodeName($elementName) + { + return strtolower(preg_replace('/[^a-zA-Z0-9]/u', '', $elementName)); + } +} From 596023a8a724bc109f9b75f6ad8a29c6389758c8 Mon Sep 17 00:00:00 2001 From: Aljosha Papsch Date: Wed, 12 Aug 2015 10:31:49 +0200 Subject: [PATCH 02/11] Use PHPExcel_Reader_HTML_Abstract as base class of PHPExcel_Reader_HTML and adapt to changes imposed by base class. --- Classes/PHPExcel/Reader/HTML.php | 205 +++++++++++-------------------- 1 file changed, 72 insertions(+), 133 deletions(-) diff --git a/Classes/PHPExcel/Reader/HTML.php b/Classes/PHPExcel/Reader/HTML.php index a19eaec9d..60353a674 100644 --- a/Classes/PHPExcel/Reader/HTML.php +++ b/Classes/PHPExcel/Reader/HTML.php @@ -34,7 +34,7 @@ * @version ##VERSION##, ##DATE## */ /** PHPExcel root directory */ -class PHPExcel_Reader_HTML extends PHPExcel_Reader_Abstract implements PHPExcel_Reader_IReader +class PHPExcel_Reader_HTML extends PHPExcel_Reader_HTML_Abstract { /** @@ -123,39 +123,6 @@ public function __construct() $this->readFilter = new PHPExcel_Reader_DefaultReadFilter(); } - /** - * Validate that the current file is an HTML file - * - * @return boolean - */ - protected function isValidFormat() - { - // Reading 2048 bytes should be enough to validate that the format is HTML - $data = fread($this->fileHandle, 2048); - if ((strpos($data, '<') !== false) && - (strlen($data) !== strlen(strip_tags($data)))) { - return true; - } - - return false; - } - - /** - * Loads PHPExcel from file - * - * @param string $pFilename - * @return PHPExcel - * @throws PHPExcel_Reader_Exception - */ - public function load($pFilename) - { - // Create new PHPExcel - $objPHPExcel = new PHPExcel(); - - // Load into this instance - return $this->loadIntoExisting($pFilename, $objPHPExcel); - } - /** * Set input encoding * @@ -183,6 +150,12 @@ public function getInputEncoding() protected $tableLevel = 0; protected $nestedColumn = array('A'); + /** + * Active Worksheet which is used for writing to. + * @var \PHPExcel_Worksheet + */ + protected $sheet; + protected function setTableStartColumn($column) { if ($this->tableLevel == 0) { @@ -206,7 +179,17 @@ protected function releaseTableStartColumn() return array_pop($this->nestedColumn); } - protected function flushCell($sheet, $column, $row, &$cellContent) + protected function loadHandler(\PHPExcel $objPHPExcel) + { + // Create new PHPExcel worksheets. + while ($objPHPExcel->getSheetCount() <= $this->sheetIndex) { + $objPHPExcel->createSheet(); + } + $objPHPExcel->setActiveSheetIndex($this->sheetIndex); + $this->sheet = $objPHPExcel->getActiveSheet(); + } + + protected function flushCell($column, $row, &$cellContent) { if (is_string($cellContent)) { // Simple String content @@ -215,7 +198,7 @@ protected function flushCell($sheet, $column, $row, &$cellContent) // echo 'FLUSH CELL: ' , $column , $row , ' => ' , $cellContent , '
'; // Write to worksheet to be done here... // ... we return the cell so we can mess about with styles more easily - $sheet->setCellValue($column . $row, $cellContent, true); + $this->sheet->setCellValue($column . $row, $cellContent, true); $this->dataArray[$row][$column] = $cellContent; } } else { @@ -226,28 +209,29 @@ protected function flushCell($sheet, $column, $row, &$cellContent) $cellContent = (string) ''; } - protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, &$cellContent, $format = null) + protected function textElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) { - foreach ($element->childNodes as $child) { - if ($child instanceof DOMText) { - $domText = preg_replace('/\s+/u', ' ', trim($child->nodeValue)); - if (is_string($cellContent)) { - // simply append the text if the cell content is a plain text string - $cellContent .= $domText; - } else { - // but if we have a rich text run instead, we need to append it correctly - // TODO - } - } elseif ($child instanceof DOMElement) { -// echo 'DOM ELEMENT: ' , strtoupper($child->nodeName) , '
'; + $domText = preg_replace('/\s+/u', ' ', trim($element->nodeValue)); + if (is_string($cellContent)) { + // simply append the text if the cell content is a plain text string + $cellContent .= $domText; + } else { + // but if we have a rich text run instead, we need to append it correctly + // TODO + } + } + + protected function defaultElementHandler(DOMNode $element, &$row, &$column, &$cellContent, $format = null) + { +// echo 'DOM ELEMENT: ' , strtoupper($element->nodeName) , '
'; $attributeArray = array(); - foreach ($child->attributes as $attribute) { + foreach ($element->attributes as $attribute) { // echo 'ATTRIBUTE: ' , $attribute->name , ' => ' , $attribute->value , '
'; $attributeArray[$attribute->name] = $attribute->value; } - switch ($child->nodeName) { + switch ($element->nodeName) { case 'meta': foreach ($attributeArray as $attributeName => $attributeValue) { switch ($attributeName) { @@ -257,11 +241,11 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, break; } } - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); break; case 'title': - $this->processDomElement($child, $sheet, $row, $column, $cellContent); - $sheet->setTitle($cellContent); + $this->processDomElement($element, $row, $column, $cellContent); + $this->sheet->setTitle($cellContent); $cellContent = ''; break; case 'span': @@ -275,20 +259,20 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, if ($cellContent > '') { $cellContent .= ' '; } - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); if ($cellContent > '') { $cellContent .= ' '; } // echo 'END OF STYLING, SPAN OR DIV
'; break; case 'hr': - $this->flushCell($sheet, $column, $row, $cellContent); + $this->flushCell($column, $row, $cellContent); ++$row; - if (isset($this->formats[$child->nodeName])) { - $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); + if (isset($this->formats[$element->nodeName])) { + $this->sheet->getStyle($column . $row)->applyFromArray($this->formats[$element->nodeName]); } else { $cellContent = '----------'; - $this->flushCell($sheet, $column, $row, $cellContent); + $this->flushCell($column, $row, $cellContent); } ++$row; // Add a break after a horizontal rule, simply by allowing the code to dropthru @@ -298,7 +282,7 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, $cellContent .= "\n"; } else { // Otherwise flush our existing content and move the row cursor on - $this->flushCell($sheet, $column, $row, $cellContent); + $this->flushCell($column, $row, $cellContent); ++$row; } // echo 'HARD LINE BREAK: ' , '
'; @@ -309,15 +293,15 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, switch ($attributeName) { case 'href': // echo 'Link to ' , $attributeValue , '
'; - $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue); - if (isset($this->formats[$child->nodeName])) { - $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); + $this->sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue); + if (isset($this->formats[$element->nodeName])) { + $this->sheet->getStyle($column . $row)->applyFromArray($this->formats[$element->nodeName]); } break; } } $cellContent .= ' '; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); // echo 'END OF HYPERLINK:' , '
'; break; case 'h1': @@ -333,20 +317,20 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, // If we're inside a table, replace with a \n $cellContent .= "\n"; // echo 'LIST ENTRY: ' , '
'; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); // echo 'END OF LIST ENTRY:' , '
'; } else { if ($cellContent > '') { - $this->flushCell($sheet, $column, $row, $cellContent); + $this->flushCell($column, $row, $cellContent); $row++; } // echo 'START OF PARAGRAPH: ' , '
'; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); // echo 'END OF PARAGRAPH:' , '
'; - $this->flushCell($sheet, $column, $row, $cellContent); + $this->flushCell($column, $row, $cellContent); - if (isset($this->formats[$child->nodeName])) { - $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]); + if (isset($this->formats[$element->nodeName])) { + $this->sheet->getStyle($column . $row)->applyFromArray($this->formats[$element->nodeName]); } $row++; @@ -358,28 +342,28 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, // If we're inside a table, replace with a \n $cellContent .= "\n"; // echo 'LIST ENTRY: ' , '
'; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); // echo 'END OF LIST ENTRY:' , '
'; } else { if ($cellContent > '') { - $this->flushCell($sheet, $column, $row, $cellContent); + $this->flushCell($column, $row, $cellContent); } ++$row; // echo 'LIST ENTRY: ' , '
'; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); // echo 'END OF LIST ENTRY:' , '
'; - $this->flushCell($sheet, $column, $row, $cellContent); + $this->flushCell($column, $row, $cellContent); $column = 'A'; } break; case 'table': - $this->flushCell($sheet, $column, $row, $cellContent); + $this->flushCell($column, $row, $cellContent); $column = $this->setTableStartColumn($column); // echo 'START OF TABLE LEVEL ' , $this->tableLevel , '
'; if ($this->tableLevel > 1) { --$row; } - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); // echo 'END OF TABLE LEVEL ' , $this->tableLevel , '
'; $column = $this->releaseTableStartColumn(); if ($this->tableLevel > 1) { @@ -390,33 +374,33 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, break; case 'thead': case 'tbody': - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); break; case 'tr': $column = $this->getTableStartColumn(); $cellContent = ''; // echo 'START OF TABLE ' , $this->tableLevel , ' ROW
'; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); ++$row; // echo 'END OF TABLE ' , $this->tableLevel , ' ROW
'; break; case 'th': case 'td': // echo 'START OF TABLE ' , $this->tableLevel , ' CELL
'; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); // echo 'END OF TABLE ' , $this->tableLevel , ' CELL
'; while (isset($this->rowspan[$column . $row])) { ++$column; } - $this->flushCell($sheet, $column, $row, $cellContent); + $this->flushCell($column, $row, $cellContent); // if (isset($attributeArray['style']) && !empty($attributeArray['style'])) { // $styleAry = $this->getPhpExcelStyleArray($attributeArray['style']); // // if (!empty($styleAry)) { -// $sheet->getStyle($column . $row)->applyFromArray($styleAry); +// $this->sheet->getStyle($column . $row)->applyFromArray($styleAry); // } // } @@ -430,7 +414,7 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) { $this->rowspan[$value] = true; } - $sheet->mergeCells($range); + $this->sheet->mergeCells($range); $column = $columnTo; } elseif (isset($attributeArray['rowspan'])) { //create merging rowspan @@ -438,14 +422,14 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) { $this->rowspan[$value] = true; } - $sheet->mergeCells($range); + $this->sheet->mergeCells($range); } elseif (isset($attributeArray['colspan'])) { //create merging colspan $columnTo = $column; for ($i = 0; $i < $attributeArray['colspan'] - 1; $i++) { ++$columnTo; } - $sheet->mergeCells($column . $row . ':' . $columnTo . $row); + $this->sheet->mergeCells($column . $row . ':' . $columnTo . $row); $column = $columnTo; } ++$column; @@ -455,58 +439,11 @@ protected function processDomElement(DOMNode $element, $sheet, &$row, &$column, $column = 'A'; $content = ''; $this->tableLevel = 0; - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); break; default: - $this->processDomElement($child, $sheet, $row, $column, $cellContent); + $this->processDomElement($element, $row, $column, $cellContent); } - } - } - } - - /** - * Loads PHPExcel from file into PHPExcel instance - * - * @param string $pFilename - * @param PHPExcel $objPHPExcel - * @return PHPExcel - * @throws PHPExcel_Reader_Exception - */ - public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel) - { - // Open file to validate - $this->openFile($pFilename); - if (!$this->isValidFormat()) { - fclose($this->fileHandle); - throw new PHPExcel_Reader_Exception($pFilename . " is an Invalid HTML file."); - } - // Close after validating - fclose($this->fileHandle); - - // Create new PHPExcel - while ($objPHPExcel->getSheetCount() <= $this->sheetIndex) { - $objPHPExcel->createSheet(); - } - $objPHPExcel->setActiveSheetIndex($this->sheetIndex); - - // Create a new DOM object - $dom = new domDocument; - // Reload the HTML file into the DOM object - $loaded = $dom->loadHTML($this->securityScanFile($pFilename)); - if ($loaded === false) { - throw new PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document'); - } - - // Discard white space - $dom->preserveWhiteSpace = false; - - $row = 0; - $column = 'A'; - $content = ''; - $this->processDomElement($dom, $objPHPExcel->getActiveSheet(), $row, $column, $content); - - // Return - return $objPHPExcel; } /** @@ -546,4 +483,6 @@ public function securityScan($xml) } return $xml; } + + } From 374df4181eeecf1467a2e6eaa8b2e8ac5cc10b27 Mon Sep 17 00:00:00 2001 From: Aljosha Papsch Date: Wed, 12 Aug 2015 12:58:50 +0200 Subject: [PATCH 03/11] PHPExcel_Reader_HTML_Abstract: Decide whether to traverse child nodes of child from return value of handler method. --- Classes/PHPExcel/Reader/HTML.php | 1 + Classes/PHPExcel/Reader/HTML/Abstract.php | 35 +++++++++++++++++++---- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/Classes/PHPExcel/Reader/HTML.php b/Classes/PHPExcel/Reader/HTML.php index 60353a674..f0d6e69e9 100644 --- a/Classes/PHPExcel/Reader/HTML.php +++ b/Classes/PHPExcel/Reader/HTML.php @@ -444,6 +444,7 @@ protected function defaultElementHandler(DOMNode $element, &$row, &$column, &$ce default: $this->processDomElement($element, $row, $column, $cellContent); } + // This method does all traversing itself, no TRAVERSE_CHILD hint needed. } /** diff --git a/Classes/PHPExcel/Reader/HTML/Abstract.php b/Classes/PHPExcel/Reader/HTML/Abstract.php index 3c4f896ef..bae4c445c 100644 --- a/Classes/PHPExcel/Reader/HTML/Abstract.php +++ b/Classes/PHPExcel/Reader/HTML/Abstract.php @@ -33,6 +33,12 @@ abstract class PHPExcel_Reader_HTML_Abstract extends PHPExcel_Reader_Abstract implements PHPExcel_Reader_IReader { + /** + * Tell processDomElement to traverse child elements of the current element recursively. + * @var int + */ + const TRAVERSE_CHILDS = 1; + /** * Write cell content at specified position to active sheet. * @param int $row @@ -47,6 +53,7 @@ protected abstract function flushCell($row, $column, &$cellContent); * @param int $row * @param string $column * @param string $cellContent + * @return int TRAVERSE_CHILDS or null */ protected abstract function defaultElementHandler(\DOMNode $element, &$row, &$column, &$cellContent); @@ -140,20 +147,36 @@ protected function isValidFormat() return false; } + /** + * Traverse elements in DOM and invoke handler. + * A handler method in own object with name ElementHandler + * is invoked if the method exists, or defaultElementHandler if not. + * Handlers can indicate whether to traverse child elements, by returning + * TRAVERSE_CHILDS. Childs are traversed recursively. + * @param \DOMNode $element Element of which childs are traversed. + * @param int $row Row number + * @param string $column Excel style column name + * @param $cellContent A buffer which can be used by implementation to store temporary cell content before flushing to cell. + */ protected function processDomElement(DOMNode $element, &$row, &$column, &$cellContent) { foreach ($element->childNodes as $child) { if ($child instanceof \DOMText) { $this->textElementHandler($child, $row, $column, $cellContent); } elseif ($child instanceof \DOMElement) { - // For each element a handler is invoked dynamically. If you don't want to use - // dynamic dispatch, use defaultElementHandler. + // For each element a handler is invoked dynamically. If you + // don't want to use dynamic dispatch, use defaultElementHandler. $nodeName = $this->cleanNodeName($child->nodeName); $handlerName = $nodeName . "ElementHandler"; - if (method_exists($this, $handlerName)) { - $this->{$handlerName}($child, $row, $column, $cellContent); - } else { - $this->defaultElementHandler($child, $row, $column, $cellContent); + $continueWith = (method_exists($this, $handlerName) + ? $this->{$handlerName}($child, $row, $column, $cellContent) + : $this->defaultElementHandler($child, $row, $column, $cellContent)); + if ($continueWith === self::TRAVERSE_CHILDS && $child->hasChildNodes()) { + // Handlers may traverse the DOM themselves. To avoid + // unnecessary traversing in here, by default no childs of + // the child are traversed. If however indicated by handler + // to traverse childs, then do so. + $this->processDomElement($child, $row, $column, $cellContent); } } } From 1b0fe2f0c0a1ae8947b10fe2164d1a5872b5188f Mon Sep 17 00:00:00 2001 From: Aljosha Papsch Date: Wed, 12 Aug 2015 13:01:54 +0200 Subject: [PATCH 04/11] PHPExcel_Reader_HTML_Abstract::flushCell: Fix parameter names. --- Classes/PHPExcel/Reader/HTML/Abstract.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Classes/PHPExcel/Reader/HTML/Abstract.php b/Classes/PHPExcel/Reader/HTML/Abstract.php index bae4c445c..781a6d78f 100644 --- a/Classes/PHPExcel/Reader/HTML/Abstract.php +++ b/Classes/PHPExcel/Reader/HTML/Abstract.php @@ -41,11 +41,11 @@ abstract class PHPExcel_Reader_HTML_Abstract extends PHPExcel_Reader_Abstract im /** * Write cell content at specified position to active sheet. - * @param int $row * @param string $column + * @param int $row * @param string $cellContent */ - protected abstract function flushCell($row, $column, &$cellContent); + protected abstract function flushCell($column, $row, &$cellContent); /** * Handler for elements with no explicit handler. From bf94edbfd0e4a3449e2a292aea0cfa970750982c Mon Sep 17 00:00:00 2001 From: Aljosha Papsch Date: Wed, 12 Aug 2015 13:03:58 +0200 Subject: [PATCH 05/11] PHPExcel_Reader_HTML_Abstract: Start at row 1 because 0 is invalid. --- Classes/PHPExcel/Reader/HTML/Abstract.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Classes/PHPExcel/Reader/HTML/Abstract.php b/Classes/PHPExcel/Reader/HTML/Abstract.php index 781a6d78f..1a6c426f1 100644 --- a/Classes/PHPExcel/Reader/HTML/Abstract.php +++ b/Classes/PHPExcel/Reader/HTML/Abstract.php @@ -117,7 +117,7 @@ public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel) // Discard white space $dom->preserveWhiteSpace = false; - $row = 0; + $row = 1; $column = 'A'; $content = ''; From 5bdb601c4f105fe892a6ab07427a2900df2240b9 Mon Sep 17 00:00:00 2001 From: Aljosha Papsch Date: Wed, 12 Aug 2015 13:07:32 +0200 Subject: [PATCH 06/11] Add a handler which is invoked after processing the DOM. --- Classes/PHPExcel/Reader/HTML.php | 4 ++++ Classes/PHPExcel/Reader/HTML/Abstract.php | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/Classes/PHPExcel/Reader/HTML.php b/Classes/PHPExcel/Reader/HTML.php index f0d6e69e9..5bf68b8e0 100644 --- a/Classes/PHPExcel/Reader/HTML.php +++ b/Classes/PHPExcel/Reader/HTML.php @@ -189,6 +189,10 @@ protected function loadHandler(\PHPExcel $objPHPExcel) $this->sheet = $objPHPExcel->getActiveSheet(); } + protected function finishHandler() + { + } + protected function flushCell($column, $row, &$cellContent) { if (is_string($cellContent)) { diff --git a/Classes/PHPExcel/Reader/HTML/Abstract.php b/Classes/PHPExcel/Reader/HTML/Abstract.php index 1a6c426f1..1ec719f66 100644 --- a/Classes/PHPExcel/Reader/HTML/Abstract.php +++ b/Classes/PHPExcel/Reader/HTML/Abstract.php @@ -73,6 +73,12 @@ protected abstract function textElementHandler(\DOMNode $element, &$row, &$colum */ protected abstract function loadHandler(\PHPExcel $objPHPExcel); + /** + * Handler which is executed after traversing elements and before + * returning from load method. + */ + protected abstract function finishHandler(); + /** * Loads PHPExcel from file. * @param string $pFilename @@ -126,6 +132,9 @@ public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel) $this->processDomElement($dom, $row, $column, $content); + // Allow implementation specific operation after processing. + $this->finishHandler(); + // Return return $objPHPExcel; } From 5a58a634bdfd356103ceca6e1c2b9743dce71a28 Mon Sep 17 00:00:00 2001 From: Aljosha Papsch Date: Wed, 12 Aug 2015 13:08:22 +0200 Subject: [PATCH 07/11] Move override of securityScan in PHPExcel_Reader_HTML to base class. --- Classes/PHPExcel/Reader/HTML.php | 15 --------------- Classes/PHPExcel/Reader/HTML/Abstract.php | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Classes/PHPExcel/Reader/HTML.php b/Classes/PHPExcel/Reader/HTML.php index 5bf68b8e0..69a534368 100644 --- a/Classes/PHPExcel/Reader/HTML.php +++ b/Classes/PHPExcel/Reader/HTML.php @@ -474,20 +474,5 @@ public function setSheetIndex($pValue = 0) return $this; } - /** - * Scan theXML for use of Date: Wed, 12 Aug 2015 13:09:35 +0200 Subject: [PATCH 08/11] Add HTML reader which specializes on semantic tables. --- .../PHPExcel/Reader/HTML/SemanticTable.php | 211 ++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 Classes/PHPExcel/Reader/HTML/SemanticTable.php diff --git a/Classes/PHPExcel/Reader/HTML/SemanticTable.php b/Classes/PHPExcel/Reader/HTML/SemanticTable.php new file mode 100644 index 000000000..28fb12655 --- /dev/null +++ b/Classes/PHPExcel/Reader/HTML/SemanticTable.php @@ -0,0 +1,211 @@ + caption - worksheet title + * table > thead - Header rows (formatted bold) + * table > tbody - Data rows (no formatting) + * + * Copyright (c) 2015 Wine Logistix GmbH + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * @category PHPExcel + * @package PHPExcel_Reader_HTML_SemanticTable + * @copyright Copyright (c) 2015 Wine Logistix (http://www.wine-logistix.de) + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL + * @version ##VERSION##, ##DATE## + */ +class PHPExcel_Reader_HTML_SemanticTable extends PHPExcel_Reader_HTML_Abstract +{ + + /** + * @var \PHPExcel + */ + protected $excel; + + /** + * Write cell content at specified position to active sheet. + * @param int $row + * @param string $column + * @param string $cellContent + */ + protected function flushCell($column, $row, &$cellContent) + { + if (is_string($cellContent)) { + $cellContent = trim($cellContent); + if ($cellContent !== '') { + $this->excel->getActiveSheet()->setCellValue($column.$row, $cellContent); + } + } + } + + /** + * Handler for elements with no explicit handler. + * @param \DOMNode $element + * @param int $row + * @param string $column + * @param string $cellContent + */ + protected function defaultElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + { + // This implementation doesn't care about any element except the ones + // for which an explicit handler is defined. To get to these elements + // though, children of the other elements need to be traversed. + return \PHPExcel_Reader_HTML_Abstract::TRAVERSE_CHILDS; + } + + /** + * Handler for DOMText elements. + * @param \DOMNode $element + * @param int $row + * @param string $column + * @param string $cellContent + */ + protected function textElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + { + } + + /** + * Handler which is executed after loading the HTML file and before + * traversing elements. + * @param \PHPExcel $objPHPExcel + */ + protected function loadHandler(\PHPExcel $objPHPExcel) + { + $this->excel = $objPHPExcel; + // Remove first sheet because if no table elements are occured + // in document, then it's an error in the source file. + $this->excel->removeSheetByIndex(0); + } + + protected function finishHandler() + { + if ($this->excel->getSheetCount() > 0) { + // This is cosmetic; during processing a worksheet was created + // for each table and the last created is set active. When opening + // the file in GUI, the last worksheet would open, but it's most + // likely desired to view the first worksheet first. + $this->excel->setActiveSheetIndex(0); + } + } + + /** + * Set document title. + * @param \DOMNode $element + * @param int $row + * @param string $column + * @param string $cellContent + */ + protected function titleElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + { + $this->excel->getProperties()->setTitle($element->textContent); + } + + /** + * Create a new worksheet and use it as active sheet. + * @param \DOMNode $element + * @param int $row + * @param string $column + * @param string $cellContent + */ + protected function tableElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + { + $sheetNum = $this->excel->getSheetCount(); + $this->excel->createSheet(); + $this->excel->setActiveSheetIndex($sheetNum); + // Row and column need to be reset. + $row = 1; + $column = 'A'; + return PHPExcel_Reader_HTML_Abstract::TRAVERSE_CHILDS; + } + + /** + * Set title of current active sheet. + * @param \DOMNode $element + * @param int $row + * @param string $column + * @param string $cellContent + */ + protected function captionElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + { + $this->excel->getActiveSheet()->setTitle($element->textContent); + } + + /** + * For each header row in thead, create a row with bold formatted columns. + * @param \DOMNode $element + * @param int $row + * @param string $column + * @param string $cellContent + */ + protected function theadElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + { + foreach ($element->childNodes as $child) { + if ($this->isElement($child, "tr")) { + $this->createHeaderRow($child, $row); + $row += 1; + } + } + // Don't traverse childs as they are already traversed in here. + } + + protected function tbodyElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + { + foreach ($element->childNodes as $child) { + if ($this->isElement($child, "tr")) { + $this->createDataRow($child, $row); + $row += 1; + } + } + // Don't traverse childs as they are already traversed in here. + } + + protected function createHeaderRow(\DOMNode $theadRow, $row) + { + $column = 'A'; + foreach ($theadRow->childNodes as $child) { + if ($this->isElement($child, "th")) { + $this->flushCell($column, $row, $child->textContent); + $column++; + } + } + // Formatting headers by using range is faster than doing it in the loop. + $range = sprintf('A%d:%s%d', $row, $column, $row); + $this->excel->getActiveSheet()->getStyle($range)->getFont()->setBold(true); + } + + protected function createDataRow(\DOMNode $tbodyRow, $row) + { + $column = 'A'; + foreach ($tbodyRow->childNodes as $child) { + if ($this->isElement($child, "td")) { + $this->flushCell($column, $row, $child->textContent); + $column++; + } + } + } + + private function isElement($el, $name) { + return $el instanceof \DOMNode && $el->nodeName === $name; + } + +} From d76466fe948ef8d25b1840da502d52b08de124bf Mon Sep 17 00:00:00 2001 From: Aljosha Papsch Date: Wed, 12 Aug 2015 14:12:09 +0200 Subject: [PATCH 09/11] Cleanup: copyright notice, comments, explicit namespaces. --- Classes/PHPExcel/Reader/HTML.php | 12 ++++--- Classes/PHPExcel/Reader/HTML/Abstract.php | 34 +++++++++++++------ .../PHPExcel/Reader/HTML/SemanticTable.php | 13 +++++-- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/Classes/PHPExcel/Reader/HTML.php b/Classes/PHPExcel/Reader/HTML.php index 69a534368..c186b93ad 100644 --- a/Classes/PHPExcel/Reader/HTML.php +++ b/Classes/PHPExcel/Reader/HTML.php @@ -12,6 +12,7 @@ * PHPExcel_Reader_HTML * * Copyright (c) 2006 - 2015 PHPExcel + * Copyright (c) 2015 Wine Logistix GmbH * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -30,6 +31,7 @@ * @category PHPExcel * @package PHPExcel_Reader * @copyright Copyright (c) 2006 - 2015 PHPExcel (http://www.codeplex.com/PHPExcel) + * @copyright Copyright (c) 2015 Wine Logistix GmbH (http://www.wine-logistix.de) * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL * @version ##VERSION##, ##DATE## */ @@ -97,16 +99,16 @@ class PHPExcel_Reader_HTML extends PHPExcel_Reader_HTML_Abstract 'font' => array( 'underline' => true, 'color' => array( - 'argb' => PHPExcel_Style_Color::COLOR_BLUE, + 'argb' => \PHPExcel_Style_Color::COLOR_BLUE, ), ), ), // Blue underlined 'hr' => array( 'borders' => array( 'bottom' => array( - 'style' => PHPExcel_Style_Border::BORDER_THIN, + 'style' => \PHPExcel_Style_Border::BORDER_THIN, 'color' => array( - PHPExcel_Style_Color::COLOR_BLACK, + \PHPExcel_Style_Color::COLOR_BLACK, ), ), ), @@ -120,7 +122,7 @@ class PHPExcel_Reader_HTML extends PHPExcel_Reader_HTML_Abstract */ public function __construct() { - $this->readFilter = new PHPExcel_Reader_DefaultReadFilter(); + $this->readFilter = new \PHPExcel_Reader_DefaultReadFilter(); } /** @@ -225,7 +227,7 @@ protected function textElementHandler(\DOMNode $element, &$row, &$column, &$cell } } - protected function defaultElementHandler(DOMNode $element, &$row, &$column, &$cellContent, $format = null) + protected function defaultElementHandler(\DOMNode $element, &$row, &$column, &$cellContent, $format = null) { // echo 'DOM ELEMENT: ' , strtoupper($element->nodeName) , '
'; diff --git a/Classes/PHPExcel/Reader/HTML/Abstract.php b/Classes/PHPExcel/Reader/HTML/Abstract.php index 853ffd1ed..79abf3a02 100644 --- a/Classes/PHPExcel/Reader/HTML/Abstract.php +++ b/Classes/PHPExcel/Reader/HTML/Abstract.php @@ -1,11 +1,24 @@ ElementHandler where is lowercase element name. + * Explicit handlers must accept same arguments as defaultElementHandler. + * + * Other handlers exist which facilitate implementation specific behavior: + * + * flushCell - Write a cell value + * textElementHandler - Invoked for DOMText elements. + * loadHandler - Invoked before traversing the DOM. + * finishHandler - Invoked after traversing the DOM. + * * Copyright (c) 2006 - 2015 PHPExcel * Copyright (c) 2015 Wine Logistix GmbH * @@ -34,7 +47,8 @@ abstract class PHPExcel_Reader_HTML_Abstract extends PHPExcel_Reader_Abstract im { /** - * Tell processDomElement to traverse child elements of the current element recursively. + * Tell processDomElement to traverse child elements of the current child + * element recursively. * @var int */ const TRAVERSE_CHILDS = 1; @@ -53,7 +67,7 @@ protected abstract function flushCell($column, $row, &$cellContent); * @param int $row * @param string $column * @param string $cellContent - * @return int TRAVERSE_CHILDS or null + * @return int|null TRAVERSE_CHILDS or null */ protected abstract function defaultElementHandler(\DOMNode $element, &$row, &$column, &$cellContent); @@ -88,7 +102,7 @@ protected abstract function finishHandler(); public function load($pFilename) { // Create new PHPExcel - $objPHPExcel = new PHPExcel(); + $objPHPExcel = new \PHPExcel(); // Load into this instance return $this->loadIntoExisting($pFilename, $objPHPExcel); } @@ -101,23 +115,23 @@ public function load($pFilename) * @return PHPExcel * @throws PHPExcel_Reader_Exception */ - public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel) + public function loadIntoExisting($pFilename, \PHPExcel $objPHPExcel) { // Open file to validate $this->openFile($pFilename); if (!$this->isValidFormat()) { fclose($this->fileHandle); - throw new PHPExcel_Reader_Exception($pFilename . " is an invalid HTML file."); + throw new \PHPExcel_Reader_Exception($pFilename . " is an invalid HTML file."); } // Close after validating fclose($this->fileHandle); // Create a new DOM object - $dom = new DOMDocument(); + $dom = new \DOMDocument(); // Reload the HTML file into the DOM object $loaded = $dom->loadHTML($this->securityScanFile($pFilename)); if ($loaded === false) { - throw new PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document'); + throw new \PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document'); } // Discard white space @@ -167,7 +181,7 @@ protected function isValidFormat() * @param string $column Excel style column name * @param $cellContent A buffer which can be used by implementation to store temporary cell content before flushing to cell. */ - protected function processDomElement(DOMNode $element, &$row, &$column, &$cellContent) + protected function processDomElement(\DOMNode $element, &$row, &$column, &$cellContent) { foreach ($element->childNodes as $child) { if ($child instanceof \DOMText) { @@ -206,7 +220,7 @@ public function securityScan($xml) { $pattern = '/\\0?' . implode('\\0?', str_split(' Date: Wed, 12 Aug 2015 15:00:57 +0200 Subject: [PATCH 10/11] PHPExcel_Reader_HTML_Abstract: Get HTML content as string in addition to reading from file. --- Classes/PHPExcel/Reader/HTML/Abstract.php | 63 +++++++++++++++++------ 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/Classes/PHPExcel/Reader/HTML/Abstract.php b/Classes/PHPExcel/Reader/HTML/Abstract.php index 79abf3a02..a338e63f5 100644 --- a/Classes/PHPExcel/Reader/HTML/Abstract.php +++ b/Classes/PHPExcel/Reader/HTML/Abstract.php @@ -103,10 +103,34 @@ public function load($pFilename) { // Create new PHPExcel $objPHPExcel = new \PHPExcel(); + // Open file to validate + $this->openFile($pFilename); + if (!$this->isValidFileFormat()) { + fclose($this->fileHandle); + throw new \PHPExcel_Reader_Exception($pFilename . " is an invalid HTML file."); + } + // Close after validating + fclose($this->fileHandle); // Load into this instance return $this->loadIntoExisting($pFilename, $objPHPExcel); } + /** + * Loads PHPExcel from string. + * @param string $content HTML content + * @return PHPExcel + * @throws PHPExcel_Reader_Exception + */ + public function loadFromString($content) + { + $objPHPExcel = new \PHPExcel(); + if (!$this->isValidFormat($content)) { + throw new \PHPExcel_Reader_Exception("HTML content is invalid"); + } + $html = $this->securityScan($content); + return $this->loadIntoExistingFromString($html, $objPHPExcel); + } + /** * Loads PHPExcel from file into PHPExcel instance. * @@ -117,19 +141,20 @@ public function load($pFilename) */ public function loadIntoExisting($pFilename, \PHPExcel $objPHPExcel) { - // Open file to validate - $this->openFile($pFilename); - if (!$this->isValidFormat()) { - fclose($this->fileHandle); - throw new \PHPExcel_Reader_Exception($pFilename . " is an invalid HTML file."); - } - // Close after validating - fclose($this->fileHandle); + $html = $this->securityScanFile($pFilename); + return $this->loadIntoExistingFromString($html, $objPHPExcel); + } + /** + * Loads PHPExcel from string into PHPExcel instance. + */ + protected function loadIntoExistingFromString($content, \PHPExcel $objPHPExcel) + { + // This method is protected as it doesn't do the security scan on content. // Create a new DOM object $dom = new \DOMDocument(); // Reload the HTML file into the DOM object - $loaded = $dom->loadHTML($this->securityScanFile($pFilename)); + $loaded = $dom->loadHTML($content); if ($loaded === false) { throw new \PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document'); } @@ -154,22 +179,30 @@ public function loadIntoExisting($pFilename, \PHPExcel $objPHPExcel) } /** - * Validate that the current file is an HTML file - * + * Validate that data contains HTML. * @return boolean */ - protected function isValidFormat() + protected function isValidFormat(&$data) { - // Reading 2048 bytes should be enough to validate that the format is HTML - $data = fread($this->fileHandle, 2048); if ((strpos($data, '<') !== false) && (strlen($data) !== strlen(strip_tags($data)))) { return true; } - return false; } + /** + * Validate that the current file is an HTML file + * + * @return boolean + */ + protected function isValidFileFormat() + { + // Reading 2048 bytes should be enough to validate that the format is HTML + $data = fread($this->fileHandle, 2048); + return $this->isValidFormat($data); + } + /** * Traverse elements in DOM and invoke handler. * A handler method in own object with name ElementHandler From a0b0649b8f429e09c335ab698a10e57b6dddf93a Mon Sep 17 00:00:00 2001 From: Aljosha Papsch Date: Wed, 12 Aug 2015 16:14:45 +0200 Subject: [PATCH 11/11] Remove namespace backslashes because PHP 5.2 doesn't support namespaces. --- Classes/PHPExcel/Reader/HTML.php | 20 ++++----- Classes/PHPExcel/Reader/HTML/Abstract.php | 38 ++++++++--------- .../PHPExcel/Reader/HTML/SemanticTable.php | 42 +++++++++---------- 3 files changed, 50 insertions(+), 50 deletions(-) diff --git a/Classes/PHPExcel/Reader/HTML.php b/Classes/PHPExcel/Reader/HTML.php index c186b93ad..0ac550f9a 100644 --- a/Classes/PHPExcel/Reader/HTML.php +++ b/Classes/PHPExcel/Reader/HTML.php @@ -99,16 +99,16 @@ class PHPExcel_Reader_HTML extends PHPExcel_Reader_HTML_Abstract 'font' => array( 'underline' => true, 'color' => array( - 'argb' => \PHPExcel_Style_Color::COLOR_BLUE, + 'argb' => PHPExcel_Style_Color::COLOR_BLUE, ), ), ), // Blue underlined 'hr' => array( 'borders' => array( 'bottom' => array( - 'style' => \PHPExcel_Style_Border::BORDER_THIN, + 'style' => PHPExcel_Style_Border::BORDER_THIN, 'color' => array( - \PHPExcel_Style_Color::COLOR_BLACK, + PHPExcel_Style_Color::COLOR_BLACK, ), ), ), @@ -122,7 +122,7 @@ class PHPExcel_Reader_HTML extends PHPExcel_Reader_HTML_Abstract */ public function __construct() { - $this->readFilter = new \PHPExcel_Reader_DefaultReadFilter(); + $this->readFilter = new PHPExcel_Reader_DefaultReadFilter(); } /** @@ -154,7 +154,7 @@ public function getInputEncoding() /** * Active Worksheet which is used for writing to. - * @var \PHPExcel_Worksheet + * @var PHPExcel_Worksheet */ protected $sheet; @@ -181,7 +181,7 @@ protected function releaseTableStartColumn() return array_pop($this->nestedColumn); } - protected function loadHandler(\PHPExcel $objPHPExcel) + protected function loadHandler(PHPExcel $objPHPExcel) { // Create new PHPExcel worksheets. while ($objPHPExcel->getSheetCount() <= $this->sheetIndex) { @@ -215,7 +215,7 @@ protected function flushCell($column, $row, &$cellContent) $cellContent = (string) ''; } - protected function textElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + protected function textElementHandler(DOMNode $element, &$row, &$column, &$cellContent) { $domText = preg_replace('/\s+/u', ' ', trim($element->nodeValue)); if (is_string($cellContent)) { @@ -227,7 +227,7 @@ protected function textElementHandler(\DOMNode $element, &$row, &$column, &$cell } } - protected function defaultElementHandler(\DOMNode $element, &$row, &$column, &$cellContent, $format = null) + protected function defaultElementHandler(DOMNode $element, &$row, &$column, &$cellContent, $format = null) { // echo 'DOM ELEMENT: ' , strtoupper($element->nodeName) , '
'; @@ -417,7 +417,7 @@ protected function defaultElementHandler(\DOMNode $element, &$row, &$column, &$c ++$columnTo; } $range = $column . $row . ':' . $columnTo . ($row + $attributeArray['rowspan'] - 1); - foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) { + foreach (PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) { $this->rowspan[$value] = true; } $this->sheet->mergeCells($range); @@ -425,7 +425,7 @@ protected function defaultElementHandler(\DOMNode $element, &$row, &$column, &$c } elseif (isset($attributeArray['rowspan'])) { //create merging rowspan $range = $column . $row . ':' . $column . ($row + $attributeArray['rowspan'] - 1); - foreach (\PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) { + foreach (PHPExcel_Cell::extractAllCellReferencesInRange($range) as $value) { $this->rowspan[$value] = true; } $this->sheet->mergeCells($range); diff --git a/Classes/PHPExcel/Reader/HTML/Abstract.php b/Classes/PHPExcel/Reader/HTML/Abstract.php index a338e63f5..594dc41db 100644 --- a/Classes/PHPExcel/Reader/HTML/Abstract.php +++ b/Classes/PHPExcel/Reader/HTML/Abstract.php @@ -63,29 +63,29 @@ protected abstract function flushCell($column, $row, &$cellContent); /** * Handler for elements with no explicit handler. - * @param \DOMNode $element + * @param DOMNode $element * @param int $row * @param string $column * @param string $cellContent * @return int|null TRAVERSE_CHILDS or null */ - protected abstract function defaultElementHandler(\DOMNode $element, &$row, &$column, &$cellContent); + protected abstract function defaultElementHandler(DOMNode $element, &$row, &$column, &$cellContent); /** * Handler for DOMText elements. - * @param \DOMNode $element + * @param DOMNode $element * @param int $row * @param string $column * @param string $cellContent */ - protected abstract function textElementHandler(\DOMNode $element, &$row, &$column, &$cellContent); + protected abstract function textElementHandler(DOMNode $element, &$row, &$column, &$cellContent); /** * Handler which is executed after loading the HTML file and before * traversing elements. - * @param \PHPExcel $objPHPExcel + * @param PHPExcel $objPHPExcel */ - protected abstract function loadHandler(\PHPExcel $objPHPExcel); + protected abstract function loadHandler(PHPExcel $objPHPExcel); /** * Handler which is executed after traversing elements and before @@ -102,12 +102,12 @@ protected abstract function finishHandler(); public function load($pFilename) { // Create new PHPExcel - $objPHPExcel = new \PHPExcel(); + $objPHPExcel = new PHPExcel(); // Open file to validate $this->openFile($pFilename); if (!$this->isValidFileFormat()) { fclose($this->fileHandle); - throw new \PHPExcel_Reader_Exception($pFilename . " is an invalid HTML file."); + throw new PHPExcel_Reader_Exception($pFilename . " is an invalid HTML file."); } // Close after validating fclose($this->fileHandle); @@ -123,9 +123,9 @@ public function load($pFilename) */ public function loadFromString($content) { - $objPHPExcel = new \PHPExcel(); + $objPHPExcel = new PHPExcel(); if (!$this->isValidFormat($content)) { - throw new \PHPExcel_Reader_Exception("HTML content is invalid"); + throw new PHPExcel_Reader_Exception("HTML content is invalid"); } $html = $this->securityScan($content); return $this->loadIntoExistingFromString($html, $objPHPExcel); @@ -139,7 +139,7 @@ public function loadFromString($content) * @return PHPExcel * @throws PHPExcel_Reader_Exception */ - public function loadIntoExisting($pFilename, \PHPExcel $objPHPExcel) + public function loadIntoExisting($pFilename, PHPExcel $objPHPExcel) { $html = $this->securityScanFile($pFilename); return $this->loadIntoExistingFromString($html, $objPHPExcel); @@ -148,15 +148,15 @@ public function loadIntoExisting($pFilename, \PHPExcel $objPHPExcel) /** * Loads PHPExcel from string into PHPExcel instance. */ - protected function loadIntoExistingFromString($content, \PHPExcel $objPHPExcel) + protected function loadIntoExistingFromString($content, PHPExcel $objPHPExcel) { // This method is protected as it doesn't do the security scan on content. // Create a new DOM object - $dom = new \DOMDocument(); + $dom = new DOMDocument(); // Reload the HTML file into the DOM object $loaded = $dom->loadHTML($content); if ($loaded === false) { - throw new \PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document'); + throw new PHPExcel_Reader_Exception('Failed to load ', $pFilename, ' as a DOM Document'); } // Discard white space @@ -209,17 +209,17 @@ protected function isValidFileFormat() * is invoked if the method exists, or defaultElementHandler if not. * Handlers can indicate whether to traverse child elements, by returning * TRAVERSE_CHILDS. Childs are traversed recursively. - * @param \DOMNode $element Element of which childs are traversed. + * @param DOMNode $element Element of which childs are traversed. * @param int $row Row number * @param string $column Excel style column name * @param $cellContent A buffer which can be used by implementation to store temporary cell content before flushing to cell. */ - protected function processDomElement(\DOMNode $element, &$row, &$column, &$cellContent) + protected function processDomElement(DOMNode $element, &$row, &$column, &$cellContent) { foreach ($element->childNodes as $child) { - if ($child instanceof \DOMText) { + if ($child instanceof DOMText) { $this->textElementHandler($child, $row, $column, $cellContent); - } elseif ($child instanceof \DOMElement) { + } elseif ($child instanceof DOMElement) { // For each element a handler is invoked dynamically. If you // don't want to use dynamic dispatch, use defaultElementHandler. $nodeName = $this->cleanNodeName($child->nodeName); @@ -253,7 +253,7 @@ public function securityScan($xml) { $pattern = '/\\0?' . implode('\\0?', str_split('excel = $objPHPExcel; // Remove first sheet because if no table elements are occured @@ -119,24 +119,24 @@ protected function finishHandler() /** * Set document title. - * @param \DOMNode $element + * @param DOMNode $element * @param int $row * @param string $column * @param string $cellContent */ - protected function titleElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + protected function titleElementHandler(DOMNode $element, &$row, &$column, &$cellContent) { $this->excel->getProperties()->setTitle($element->textContent); } /** * Create a new worksheet and use it as active sheet. - * @param \DOMNode $element + * @param DOMNode $element * @param int $row * @param string $column * @param string $cellContent */ - protected function tableElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + protected function tableElementHandler(DOMNode $element, &$row, &$column, &$cellContent) { $sheetNum = $this->excel->getSheetCount(); $this->excel->createSheet(); @@ -144,29 +144,29 @@ protected function tableElementHandler(\DOMNode $element, &$row, &$column, &$cel // Row and column need to be reset. $row = 1; $column = 'A'; - return \PHPExcel_Reader_HTML_Abstract::TRAVERSE_CHILDS; + return PHPExcel_Reader_HTML_Abstract::TRAVERSE_CHILDS; } /** * Set title of current active sheet. - * @param \DOMNode $element + * @param DOMNode $element * @param int $row * @param string $column * @param string $cellContent */ - protected function captionElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + protected function captionElementHandler(DOMNode $element, &$row, &$column, &$cellContent) { $this->excel->getActiveSheet()->setTitle($element->textContent); } /** * For each header row in thead, create a row with bold formatted columns. - * @param \DOMNode $element + * @param DOMNode $element * @param int $row * @param string $column * @param string $cellContent */ - protected function theadElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + protected function theadElementHandler(DOMNode $element, &$row, &$column, &$cellContent) { foreach ($element->childNodes as $child) { if ($this->isElement($child, "tr")) { @@ -177,7 +177,7 @@ protected function theadElementHandler(\DOMNode $element, &$row, &$column, &$cel // Don't traverse childs as they are already traversed in here. } - protected function tbodyElementHandler(\DOMNode $element, &$row, &$column, &$cellContent) + protected function tbodyElementHandler(DOMNode $element, &$row, &$column, &$cellContent) { foreach ($element->childNodes as $child) { if ($this->isElement($child, "tr")) { @@ -188,7 +188,7 @@ protected function tbodyElementHandler(\DOMNode $element, &$row, &$column, &$cel // Don't traverse childs as they are already traversed in here. } - protected function createHeaderRow(\DOMNode $theadRow, $row) + protected function createHeaderRow(DOMNode $theadRow, $row) { $column = 'A'; foreach ($theadRow->childNodes as $child) { @@ -202,7 +202,7 @@ protected function createHeaderRow(\DOMNode $theadRow, $row) $this->excel->getActiveSheet()->getStyle($range)->getFont()->setBold(true); } - protected function createDataRow(\DOMNode $tbodyRow, $row) + protected function createDataRow(DOMNode $tbodyRow, $row) { $column = 'A'; foreach ($tbodyRow->childNodes as $child) { @@ -214,7 +214,7 @@ protected function createDataRow(\DOMNode $tbodyRow, $row) } private function isElement($el, $name) { - return $el instanceof \DOMNode && $el->nodeName === $name; + return $el instanceof DOMNode && $el->nodeName === $name; } }