diff --git a/include/thirdparty/svg-sanitizer/Exceptions/NestingException.php b/include/thirdparty/svg-sanitizer/Exceptions/NestingException.php index cc7b4cb..7acc842 100644 --- a/include/thirdparty/svg-sanitizer/Exceptions/NestingException.php +++ b/include/thirdparty/svg-sanitizer/Exceptions/NestingException.php @@ -1,9 +1,6 @@ element = $element; parent::__construct($message, $code, $previous); @@ -36,4 +33,4 @@ class NestingException extends \Exception { return $this->element; } -} \ No newline at end of file +} diff --git a/include/thirdparty/svg-sanitizer/README.md b/include/thirdparty/svg-sanitizer/README.md index aef40e6..2b435f5 100644 --- a/include/thirdparty/svg-sanitizer/README.md +++ b/include/thirdparty/svg-sanitizer/README.md @@ -1,8 +1,8 @@ -# svg-sanitizer +# svg-sanitizer 0.21.0 -[![Build Status](https://travis-ci.org/darylldoyle/svg-sanitizer.svg?branch=master)](https://travis-ci.org/darylldoyle/svg-sanitizer) [![Test Coverage](https://codeclimate.com/github/darylldoyle/svg-sanitizer/badges/coverage.svg)](https://codeclimate.com/github/darylldoyle/svg-sanitizer/coverage) +[![Build Status](https://github.com/darylldoyle/svg-sanitizer/actions/workflows/tests.yml/badge.svg?branch=master)](https://travis-ci.org/darylldoyle/svg-sanitizer) [![Test Coverage](https://codeclimate.com/github/darylldoyle/svg-sanitizer/badges/coverage.svg)](https://codeclimate.com/github/darylldoyle/svg-sanitizer/coverage) -This is my attempt at building a decent SVG sanitizer in PHP. The work is laregely borrowed from [DOMPurify](https://github.com/cure53/DOMPurify). +This is my attempt at building a decent SVG sanitizer in PHP. The work is largely borrowed from [DOMPurify](https://github.com/cure53/DOMPurify). ## Installation @@ -40,17 +40,17 @@ You may pass your own whitelist of tags and attributes by using the `Sanitizer:: These methods require that you implement the `enshrined\svgSanitize\data\TagInterface` or `enshrined\svgSanitize\data\AttributeInterface`. -## Remove remote references +## Remove remote references -You have the option to remove attributes that reference remote files, this will stop HTTP leaks but will add an overhead to the sanitiser. +You have the option to remove attributes that reference remote files, this will stop HTTP leaks but will add an overhead to the sanitizer. This defaults to false, set to true to remove references. `$sanitizer->removeRemoteReferences(true);` -## Viewing Sanitisation Issues +## Viewing Sanitization Issues -You may use the `getXmlIssues()` method to return an array of issues that occurred during sanitisation. +You may use the `getXmlIssues()` method to return an array of issues that occurred during sanitization. This may be useful for logging or providing feedback to the user on why an SVG was refused. @@ -58,7 +58,7 @@ This may be useful for logging or providing feedback to the user on why an SVG w ## Minification -You can minify the XML output by calling `$sanitiser->minify(true);`. +You can minify the XML output by calling `$sanitizer->minify(true);`. ## Demo There is a demo available at: [http://svg.enshrined.co.uk/](http://svg.enshrined.co.uk/) @@ -71,9 +71,14 @@ I've just released a WordPress plugin containing this code so you can sanitize y [Michael Potter](https://github.com/heyMP) has kindly created a Drupal module for this library which is available at: [https://www.drupal.org/project/svg_sanitizer](https://www.drupal.org/project/svg_sanitizer) +## TYPO3 + +This SVG sanitizer library is used per default in the core of TYPO3 v9 and later versions. +See [corresponding changelog entry](https://docs.typo3.org/c/typo3/cms-core/main/en-us/Changelog/9.5.x/Important-94492-IntroduceSVGSanitizer.html) for more details. + ## Tests -You can run these by running `phpunit` +You can run these by running `vendor/bin/phpunit` from the base directory of this package. ## Standalone scanning of files via CLI @@ -85,4 +90,4 @@ Use it as follows: `php svg-scanner.php ~/svgs/myfile.svg` ## To-Do -More extensive testing for the SVGs/XML would be lovely, I'll try and add these soon. If you feel like doing it for me, please do and make a PR! \ No newline at end of file +More extensive testing for the SVGs/XML would be lovely, I'll try and add these soon. If you feel like doing it for me, please do and make a PR! diff --git a/include/thirdparty/svg-sanitizer/Sanitizer.php b/include/thirdparty/svg-sanitizer/Sanitizer.php index 58c8111..c0cb120 100644 --- a/include/thirdparty/svg-sanitizer/Sanitizer.php +++ b/include/thirdparty/svg-sanitizer/Sanitizer.php @@ -1,5 +1,4 @@ xmlIssues; } + /** + * Can we allow huge files? + * + * @return bool + */ + public function getAllowHugeFiles() { + return $this->allowHugeFiles; + } + + /** + * Set whether we can allow huge files. + * + * @param bool $allowHugeFiles + */ + public function setAllowHugeFiles( $allowHugeFiles ) { + $this->allowHugeFiles = $allowHugeFiles; + } + /** * Sanitize the passed string * * @param string $dirty - * @return string + * @return string|false */ public function sanitize($dirty) { @@ -194,16 +220,22 @@ class Sanitizer return ''; } - // Strip php tags - $dirty = preg_replace('/<\?(=|php)(.+?)\?>/i', '', $dirty); + do { + /* + * recursively remove php tags because they can be hidden inside tags + * i.e. hp echo . ' danger! ';?> + */ + $dirty = preg_replace('/<\?(=|php)(.+?)\?>/i', '', $dirty); + } while (preg_match('/<\?(=|php)(.+?)\?>/i', $dirty) != 0); $this->resetInternal(); $this->setUpBefore(); - $loaded = $this->xmlDocument->loadXML($dirty); + $loaded = $this->xmlDocument->loadXML($dirty, $this->getAllowHugeFiles() ? LIBXML_PARSEHUGE : 0); // If we couldn't parse the XML then we go no further. Reset and return false if (!$loaded) { + $this->xmlIssues = self::getXmlErrors(); $this->resetAfter(); return false; } @@ -214,13 +246,8 @@ class Sanitizer $this->elementReferenceResolver->collect(); $elementsToRemove = $this->elementReferenceResolver->getElementsToRemove(); - // Grab all the elements - $allElements = $this->xmlDocument->getElementsByTagName("*"); - - // remove doctype after node elements have been analyzed - $this->removeDoctype(); - // Start the cleaning proccess - $this->startClean($allElements, $elementsToRemove); + // Start the cleaning process + $this->startClean($this->xmlDocument->childNodes, $elementsToRemove); // Save cleaned XML to a variable if ($this->removeXMLTag) { @@ -252,8 +279,9 @@ class Sanitizer $this->xmlLoaderValue = libxml_disable_entity_loader(true); } - // Suppress the errors because we don't really have to worry about formation before cleansing - libxml_use_internal_errors(true); + // Suppress the errors because we don't really have to worry about formation before cleansing. + // See reset in resetAfter(). + $this->xmlErrorHandlerPreviousValue = libxml_use_internal_errors(true); // Reset array of altered XML $this->xmlIssues = array(); @@ -270,19 +298,9 @@ class Sanitizer // Reset the entity loader libxml_disable_entity_loader($this->xmlLoaderValue); } - } - /** - * Remove the XML Doctype - * It may be caught later on output but that seems to be buggy, so we need to make sure it's gone - */ - protected function removeDoctype() - { - foreach ($this->xmlDocument->childNodes as $child) { - if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { - $child->parentNode->removeChild($child); - } - } + libxml_clear_errors(); + libxml_use_internal_errors($this->xmlErrorHandlerPreviousValue); } /** @@ -316,33 +334,63 @@ class Sanitizer continue; } - // If the tag isn't in the whitelist, remove it and continue with next iteration - if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) { - $currentElement->parentNode->removeChild($currentElement); - $this->xmlIssues[] = array( - 'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'', - 'line' => $currentElement->getLineNo(), - ); - continue; - } - - $this->cleanHrefs($currentElement); - - $this->cleanXlinkHrefs($currentElement); - - $this->cleanAttributesOnWhitelist($currentElement); - - if (strtolower($currentElement->tagName) === 'use') { - if ($this->isUseTagDirty($currentElement) - || $this->isUseTagExceedingThreshold($currentElement) - ) { + if ($currentElement instanceof \DOMElement) { + // If the tag isn't in the whitelist, remove it and continue with next iteration + if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) { $currentElement->parentNode->removeChild($currentElement); $this->xmlIssues[] = array( - 'message' => 'Suspicious \'' . $currentElement->tagName . '\'', + 'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'', 'line' => $currentElement->getLineNo(), ); continue; } + + $this->cleanHrefs( $currentElement ); + + $this->cleanXlinkHrefs( $currentElement ); + + $this->cleanAttributesOnWhitelist($currentElement); + + if (strtolower($currentElement->tagName) === 'use') { + if ($this->isUseTagDirty($currentElement) + || $this->isUseTagExceedingThreshold($currentElement) + ) { + $currentElement->parentNode->removeChild($currentElement); + $this->xmlIssues[] = array( + 'message' => 'Suspicious \'' . $currentElement->tagName . '\'', + 'line' => $currentElement->getLineNo(), + ); + continue; + } + } + + // Strip out font elements that will break out of foreign content. + if (strtolower($currentElement->tagName) === 'font') { + $breaksOutOfForeignContent = false; + for ($x = $currentElement->attributes->length - 1; $x >= 0; $x--) { + // get attribute name + $attrName = $currentElement->attributes->item( $x )->nodeName; + + if (in_array(strtolower($attrName), ['face', 'color', 'size'])) { + $breaksOutOfForeignContent = true; + } + } + + if ($breaksOutOfForeignContent) { + $currentElement->parentNode->removeChild($currentElement); + $this->xmlIssues[] = array( + 'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'', + 'line' => $currentElement->getLineNo(), + ); + continue; + } + } + } + + $this->cleanUnsafeNodes($currentElement); + + if ($currentElement->hasChildNodes()) { + $this->startClean($currentElement->childNodes, $elementsToRemove); } } } @@ -356,7 +404,7 @@ class Sanitizer { for ($x = $element->attributes->length - 1; $x >= 0; $x--) { // get attribute name - $attrName = $element->attributes->item($x)->name; + $attrName = $element->attributes->item($x)->nodeName; // Remove attribute if not in whitelist if (!in_array(strtolower($attrName), $this->allowedAttrs) && !$this->isAriaAttribute(strtolower($attrName)) && !$this->isDataAttribute(strtolower($attrName))) { @@ -432,15 +480,15 @@ class Sanitizer } } -/** - * Only allow whitelisted starts to be within the href. - * - * This will stop scripts etc from being passed through, with or without attempting to hide bypasses. - * This stops the need for us to use a complicated script regex. - * - * @param $value - * @return bool - */ + /** + * Only allow whitelisted starts to be within the href. + * + * This will stop scripts etc from being passed through, with or without attempting to hide bypasses. + * This stops the need for us to use a complicated script regex. + * + * @param $value + * @return bool + */ protected function isHrefSafeValue($value) { // Allow empty values @@ -476,7 +524,7 @@ class Sanitizer 'data:image/jpe', // JPEG 'data:image/pjp', // PJPEG ))) { - return true; + return true; } // Allow known short data URIs. @@ -627,4 +675,50 @@ class Sanitizer { $this->useNestingLimit = (int) $limit; } + + /** + * Remove nodes that are either invalid or malformed. + * + * @param \DOMNode $currentElement The current element. + */ + protected function cleanUnsafeNodes(\DOMNode $currentElement) { + // Replace CDATA node with encoded text node + if ($currentElement instanceof \DOMCdataSection) { + $textNode = $currentElement->ownerDocument->createTextNode($currentElement->nodeValue); + $currentElement->parentNode->replaceChild($textNode, $currentElement); + // If the element doesn't have a tagname, remove it and continue with next iteration + } elseif (!$currentElement instanceof \DOMElement && !$currentElement instanceof \DOMText) { + $currentElement->parentNode->removeChild($currentElement); + $this->xmlIssues[] = array( + 'message' => 'Suspicious node \'' . $currentElement->nodeName . '\'', + 'line' => $currentElement->getLineNo(), + ); + return; + } + + if ( $currentElement->childNodes && $currentElement->childNodes->length > 0 ) { + for ($j = $currentElement->childNodes->length - 1; $j >= 0; $j--) { + /** @var \DOMElement $childElement */ + $childElement = $currentElement->childNodes->item($j); + $this->cleanUnsafeNodes($childElement); + } + } + } + + /** + * Retrieve array of errors + * @return array + */ + private static function getXmlErrors() + { + $errors = []; + foreach (libxml_get_errors() as $error) { + $errors[] = [ + 'message' => trim($error->message), + 'line' => $error->line, + ]; + } + + return $errors; + } } diff --git a/include/thirdparty/svg-sanitizer/data/AllowedAttributes.php b/include/thirdparty/svg-sanitizer/data/AllowedAttributes.php index a192934..7121670 100644 --- a/include/thirdparty/svg-sanitizer/data/AllowedAttributes.php +++ b/include/thirdparty/svg-sanitizer/data/AllowedAttributes.php @@ -1,9 +1,6 @@