diff --git a/include/thirdparty/svg-sanitizer/ElementReference/Resolver.php b/include/thirdparty/svg-sanitizer/ElementReference/Resolver.php new file mode 100644 index 0000000..cd7a840 --- /dev/null +++ b/include/thirdparty/svg-sanitizer/ElementReference/Resolver.php @@ -0,0 +1,169 @@ +xPath = $xPath; + $this->useNestingLimit = $useNestingLimit; + } + + public function collect() + { + $this->collectIdentifiedElements(); + $this->processReferences(); + $this->determineInvalidSubjects(); + } + + /** + * Resolves one subject by element. + * + * @param \DOMElement $element + * @param bool $considerChildren Whether to search in Subject's children as well + * @return Subject|null + */ + public function findByElement(\DOMElement $element, $considerChildren = false) + { + foreach ($this->subjects as $subject) { + if ( + $element === $subject->getElement() + || $considerChildren && Helper::isElementContainedIn($element, $subject->getElement()) + ) { + return $subject; + } + } + return null; + } + + /** + * Resolves subjects (plural!) by element id - in theory malformed + * DOM might have same ids assigned to different elements and leaving + * it to client/browser implementation which element to actually use. + * + * @param string $elementId + * @return Subject[] + */ + public function findByElementId($elementId) + { + return array_filter( + $this->subjects, + function (Subject $subject) use ($elementId) { + return $elementId === $subject->getElementId(); + } + ); + } + + /** + * Collects elements having `id` attribute (those that can be referenced). + */ + protected function collectIdentifiedElements() + { + /** @var \DOMNodeList|\DOMElement[] $elements */ + $elements = $this->xPath->query('//*[@id]'); + foreach ($elements as $element) { + $this->subjects[$element->getAttribute('id')] = new Subject($element, $this->useNestingLimit); + } + } + + /** + * Processes references from and to elements having `id` attribute concerning + * their occurrence in `` statements. + */ + protected function processReferences() + { + $useNodeName = $this->xPath->createNodeName('use'); + foreach ($this->subjects as $subject) { + $useElements = $this->xPath->query( + $useNodeName . '[@href or @xlink:href]', + $subject->getElement() + ); + + /** @var \DOMElement $useElement */ + foreach ($useElements as $useElement) { + $useId = Helper::extractIdReferenceFromHref( + Helper::getElementHref($useElement) + ); + if ($useId === null || !isset($this->subjects[$useId])) { + continue; + } + $subject->addUse($this->subjects[$useId]); + $this->subjects[$useId]->addUsedIn($subject); + } + } + } + + /** + * Determines and tags infinite loops. + */ + protected function determineInvalidSubjects() + { + foreach ($this->subjects as $subject) { + + if (in_array($subject->getElement(), $this->elementsToRemove)) { + continue; + } + + $useId = Helper::extractIdReferenceFromHref( + Helper::getElementHref($subject->getElement()) + ); + + try { + if ($useId === $subject->getElementId()) { + $this->markSubjectAsInvalid($subject); + } elseif ($subject->hasInfiniteLoop()) { + $this->markSubjectAsInvalid($subject); + } + } catch (NestingException $e) { + $this->elementsToRemove[] = $e->getElement(); + $this->markSubjectAsInvalid($subject); + } + } + } + + /** + * Get all the elements that caused a nesting exception. + * + * @return array + */ + public function getElementsToRemove() { + return $this->elementsToRemove; + } + + /** + * The Subject is invalid for some reason, therefore we should + * remove it and all it's child usages. + * + * @param Subject $subject + */ + protected function markSubjectAsInvalid(Subject $subject) { + $this->elementsToRemove = array_merge( + $this->elementsToRemove, + $subject->clearInternalAndGetAffectedElements() + ); + } +} \ No newline at end of file diff --git a/include/thirdparty/svg-sanitizer/ElementReference/Subject.php b/include/thirdparty/svg-sanitizer/ElementReference/Subject.php new file mode 100644 index 0000000..3610f0f --- /dev/null +++ b/include/thirdparty/svg-sanitizer/ElementReference/Subject.php @@ -0,0 +1,153 @@ +element = $element; + $this->useNestingLimit = $useNestingLimit; + } + + /** + * @return \DOMElement + */ + public function getElement() + { + return $this->element; + } + + /** + * @return string + */ + public function getElementId() + { + return $this->element->getAttribute('id'); + } + + /** + * @param array $subjects Previously processed subjects + * @param int $level The current level of nesting. + * @return bool + * @throws \enshrined\svgSanitize\Exceptions\NestingException + */ + public function hasInfiniteLoop(array $subjects = [], $level = 1) + { + if ($level > $this->useNestingLimit) { + throw new \enshrined\svgSanitize\Exceptions\NestingException('Nesting level too high, aborting', 1570713498, null, $this->getElement()); + } + + if (in_array($this, $subjects, true)) { + return true; + } + $subjects[] = $this; + foreach ($this->useCollection as $usage) { + if ($usage->getSubject()->hasInfiniteLoop($subjects, $level + 1)) { + return true; + } + } + return false; + } + + /** + * @param Subject $subject + */ + public function addUse(Subject $subject) + { + if ($subject === $this) { + throw new \LogicException('Cannot add self usage', 1570713416); + } + $identifier = $subject->getElementId(); + if (isset($this->useCollection[$identifier])) { + $this->useCollection[$identifier]->increment(); + return; + } + $this->useCollection[$identifier] = new Usage($subject); + } + + /** + * @param Subject $subject + */ + public function addUsedIn(Subject $subject) + { + if ($subject === $this) { + throw new \LogicException('Cannot add self as usage', 1570713417); + } + $identifier = $subject->getElementId(); + if (isset($this->usedInCollection[$identifier])) { + $this->usedInCollection[$identifier]->increment(); + return; + } + $this->usedInCollection[$identifier] = new Usage($subject); + } + + /** + * @param bool $accumulated + * @return int + */ + public function countUse($accumulated = false) + { + $count = 0; + foreach ($this->useCollection as $use) { + $useCount = $use->getSubject()->countUse(); + $count += $use->getCount() * ($accumulated ? 1 + $useCount : max(1, $useCount)); + } + return $count; + } + + /** + * @return int + */ + public function countUsedIn() + { + $count = 0; + foreach ($this->usedInCollection as $usedIn) { + $count += $usedIn->getCount() * max(1, $usedIn->getSubject()->countUsedIn()); + } + return $count; + } + + /** + * Clear the internal arrays (to free up memory as they can get big) + * and return all the child usages DOMElement's + * + * @return array + */ + public function clearInternalAndGetAffectedElements() + { + $elements = array_map(function(Usage $usage) { + return $usage->getSubject()->getElement(); + }, $this->useCollection); + + $this->usedInCollection = []; + $this->useCollection = []; + + return $elements; + } +} \ No newline at end of file diff --git a/include/thirdparty/svg-sanitizer/ElementReference/Usage.php b/include/thirdparty/svg-sanitizer/ElementReference/Usage.php new file mode 100644 index 0000000..d0ba62d --- /dev/null +++ b/include/thirdparty/svg-sanitizer/ElementReference/Usage.php @@ -0,0 +1,49 @@ +subject = $subject; + $this->count = (int)$count; + } + + /** + * @param int $by + */ + public function increment($by = 1) + { + $this->count += (int)$by; + } + + /** + * @return Subject + */ + public function getSubject() + { + return $this->subject; + } + + /** + * @return int + */ + public function getCount() + { + return $this->count; + } +} \ No newline at end of file diff --git a/include/thirdparty/svg-sanitizer/Exceptions/NestingException.php b/include/thirdparty/svg-sanitizer/Exceptions/NestingException.php new file mode 100644 index 0000000..cc7b4cb --- /dev/null +++ b/include/thirdparty/svg-sanitizer/Exceptions/NestingException.php @@ -0,0 +1,39 @@ +element = $element; + parent::__construct($message, $code, $previous); + } + + /** + * Get the element that caused the exception. + * + * @return \DOMElement + */ + public function getElement() + { + return $this->element; + } +} \ No newline at end of file diff --git a/include/thirdparty/svg-sanitizer/Helper.php b/include/thirdparty/svg-sanitizer/Helper.php new file mode 100644 index 0000000..6e25003 --- /dev/null +++ b/include/thirdparty/svg-sanitizer/Helper.php @@ -0,0 +1,53 @@ +hasAttribute('href')) { + return $element->getAttribute('href'); + } + if ($element->hasAttributeNS('http://www.w3.org/1999/xlink', 'href')) { + return $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href'); + } + return null; + } + + /** + * @param string $href + * @return string|null + */ + public static function extractIdReferenceFromHref($href) + { + if (!is_string($href) || strpos($href, '#') !== 0) { + return null; + } + return substr($href, 1); + } + + /** + * @param \DOMElement $needle + * @param \DOMElement $haystack + * @return bool + */ + public static function isElementContainedIn(\DOMElement $needle, \DOMElement $haystack) + { + if ($needle === $haystack) { + return true; + } + foreach ($haystack->childNodes as $childNode) { + if (!$childNode instanceof \DOMElement) { + continue; + } + if (self::isElementContainedIn($needle, $childNode)) { + return true; + } + } + return false; + } +} diff --git a/include/thirdparty/svg-sanitizer/Sanitizer.php b/include/thirdparty/svg-sanitizer/Sanitizer.php index b621987..58c8111 100644 --- a/include/thirdparty/svg-sanitizer/Sanitizer.php +++ b/include/thirdparty/svg-sanitizer/Sanitizer.php @@ -2,11 +2,13 @@ namespace enshrined\svgSanitize; -use DOMDocument; use enshrined\svgSanitize\data\AllowedAttributes; use enshrined\svgSanitize\data\AllowedTags; use enshrined\svgSanitize\data\AttributeInterface; use enshrined\svgSanitize\data\TagInterface; +use enshrined\svgSanitize\data\XPath; +use enshrined\svgSanitize\ElementReference\Resolver; +use enshrined\svgSanitize\ElementReference\Subject; /** * Class Sanitizer @@ -17,12 +19,7 @@ class Sanitizer { /** - * Regex to catch script and data values in attributes - */ - const SCRIPT_REGEX = '/(?:\w+script|data):/xi'; - - /** - * @var DOMDocument + * @var \DOMDocument */ protected $xmlDocument; @@ -51,6 +48,11 @@ class Sanitizer */ protected $removeRemoteReferences = false; + /** + * @var int + */ + protected $useThreshold = 1000; + /** * @var bool */ @@ -66,6 +68,16 @@ class Sanitizer */ protected $xmlIssues = array(); + /** + * @var Resolver + */ + protected $elementReferenceResolver; + + /** + * @var int + */ + protected $useNestingLimit = 15; + /** * */ @@ -81,7 +93,7 @@ class Sanitizer */ protected function resetInternal() { - $this->xmlDocument = new DOMDocument(); + $this->xmlDocument = new \DOMDocument(); $this->xmlDocument->preserveWhiteSpace = false; $this->xmlDocument->strictErrorChecking = false; $this->xmlDocument->formatOutput = !$this->minifyXML; @@ -90,7 +102,7 @@ class Sanitizer /** * Set XML options to use when saving XML * See: DOMDocument::saveXML - * + * * @param int $xmlOptions */ public function setXMLOptions($xmlOptions) @@ -98,15 +110,15 @@ class Sanitizer $this->xmlOptions = $xmlOptions; } - /** + /** * Get XML options to use when saving XML * See: DOMDocument::saveXML - * + * * @return int */ public function getXMLOptions() { - return $this->xmlOptions; + return $this->xmlOptions; } /** @@ -165,7 +177,7 @@ class Sanitizer * @return array */ public function getXmlIssues() { - return $this->xmlIssues; + return $this->xmlIssues; } @@ -196,13 +208,19 @@ class Sanitizer return false; } - $this->removeDoctype(); + // Pre-process all identified elements + $xPath = new XPath($this->xmlDocument); + $this->elementReferenceResolver = new Resolver($xPath, $this->useNestingLimit); + $this->elementReferenceResolver->collect(); + $elementsToRemove = $this->elementReferenceResolver->getElementsToRemove(); // Grab all the elements $allElements = $this->xmlDocument->getElementsByTagName("*"); + // remove doctype after node elements have been analyzed + $this->removeDoctype(); // Start the cleaning proccess - $this->startClean($allElements); + $this->startClean($allElements, $elementsToRemove); // Save cleaned XML to a variable if ($this->removeXMLTag) { @@ -227,12 +245,16 @@ class Sanitizer */ protected function setUpBefore() { - // Turn off the entity loader - $this->xmlLoaderValue = libxml_disable_entity_loader(true); + // This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is + // disabled by default, so this function is no longer needed to protect against XXE attacks. + if (\LIBXML_VERSION < 20900) { + // Turn off the entity loader + $this->xmlLoaderValue = libxml_disable_entity_loader(true); + } // Suppress the errors because we don't really have to worry about formation before cleansing libxml_use_internal_errors(true); - + // Reset array of altered XML $this->xmlIssues = array(); } @@ -242,8 +264,12 @@ class Sanitizer */ protected function resetAfter() { - // Reset the entity loader - libxml_disable_entity_loader($this->xmlLoaderValue); + // This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is + // disabled by default, so this function is no longer needed to protect against XXE attacks. + if (\LIBXML_VERSION < 20900) { + // Reset the entity loader + libxml_disable_entity_loader($this->xmlLoaderValue); + } } /** @@ -263,37 +289,57 @@ class Sanitizer * Start the cleaning with tags, then we move onto attributes and hrefs later * * @param \DOMNodeList $elements + * @param array $elementsToRemove */ - protected function startClean(\DOMNodeList $elements) + protected function startClean(\DOMNodeList $elements, array $elementsToRemove) { // loop through all elements // we do this backwards so we don't skip anything if we delete a node // see comments at: http://php.net/manual/en/class.domnamednodemap.php for ($i = $elements->length - 1; $i >= 0; $i--) { + /** @var \DOMElement $currentElement */ $currentElement = $elements->item($i); + /** + * If the element has exceeded the nesting limit, we should remove it. + * + * As it's only elements that cause us issues with nesting DOS attacks + * we should check what the element is before removing it. For now we'll only + * remove elements. + */ + if (in_array($currentElement, $elementsToRemove) && 'use' === $currentElement->nodeName) { + $currentElement->parentNode->removeChild($currentElement); + $this->xmlIssues[] = array( + 'message' => 'Invalid \'' . $currentElement->tagName . '\'', + 'line' => $currentElement->getLineNo(), + ); + continue; + } + // If the tag isn't in the whitelist, remove it and continue with next iteration if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) { $currentElement->parentNode->removeChild($currentElement); $this->xmlIssues[] = array( 'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'', 'line' => $currentElement->getLineNo(), - ); + ); continue; } - $this->cleanAttributesOnWhitelist($currentElement); + $this->cleanHrefs($currentElement); $this->cleanXlinkHrefs($currentElement); - $this->cleanHrefs($currentElement); + $this->cleanAttributesOnWhitelist($currentElement); if (strtolower($currentElement->tagName) === 'use') { - if ($this->isUseTagDirty($currentElement)) { + if ($this->isUseTagDirty($currentElement) + || $this->isUseTagExceedingThreshold($currentElement) + ) { $currentElement->parentNode->removeChild($currentElement); $this->xmlIssues[] = array( 'message' => 'Suspicious \'' . $currentElement->tagName . '\'', - 'line' => $currentElement->getLineNo(), + 'line' => $currentElement->getLineNo(), ); continue; } @@ -319,7 +365,23 @@ class Sanitizer $this->xmlIssues[] = array( 'message' => 'Suspicious attribute \'' . $attrName . '\'', 'line' => $element->getLineNo(), - ); + ); + } + + /** + * This is used for when a namespace isn't imported properly. + * Such as xlink:href when the xlink namespace isn't imported. + * We have to do this as the link is still ran in this case. + */ + if (false !== strpos($attrName, 'href')) { + $href = $element->getAttribute($attrName); + if (false === $this->isHrefSafeValue($href)) { + $element->removeAttribute($attrName); + $this->xmlIssues[] = array( + 'message' => 'Suspicious attribute \'href\'', + 'line' => $element->getLineNo(), + ); + } } // Do we want to strip remote references? @@ -330,7 +392,7 @@ class Sanitizer $this->xmlIssues[] = array( 'message' => 'Suspicious attribute \'' . $attrName . '\'', 'line' => $element->getLineNo(), - ); + ); } } } @@ -344,22 +406,12 @@ class Sanitizer protected function cleanXlinkHrefs(\DOMElement $element) { $xlinks = $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href'); - if (preg_match(self::SCRIPT_REGEX, $xlinks) === 1) { - if (!in_array(substr($xlinks, 0, 14), array( - 'data:image/png', // PNG - 'data:image/gif', // GIF - 'data:image/jpg', // JPG - 'data:image/jpe', // JPEG - 'data:image/pjp', // PJPEG - ))) { - $element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' ); - $this->xmlIssues[] = array( - 'message' => 'Suspicious attribute \'href\'', - 'line' => $element->getLineNo(), - ); - - - } + if (false === $this->isHrefSafeValue($xlinks)) { + $element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' ); + $this->xmlIssues[] = array( + 'message' => 'Suspicious attribute \'href\'', + 'line' => $element->getLineNo(), + ); } } @@ -371,7 +423,7 @@ class Sanitizer protected function cleanHrefs(\DOMElement $element) { $href = $element->getAttribute('href'); - if (preg_match(self::SCRIPT_REGEX, $href) === 1) { + if (false === $this->isHrefSafeValue($href)) { $element->removeAttribute('href'); $this->xmlIssues[] = array( 'message' => 'Suspicious attribute \'href\'', @@ -380,6 +432,67 @@ class Sanitizer } } +/** + * Only allow whitelisted starts to be within the href. + * + * This will stop scripts etc from being passed through, with or without attempting to hide bypasses. + * This stops the need for us to use a complicated script regex. + * + * @param $value + * @return bool + */ + protected function isHrefSafeValue($value) { + + // Allow empty values + if (empty($value)) { + return true; + } + + // Allow fragment identifiers. + if ('#' === substr($value, 0, 1)) { + return true; + } + + // Allow relative URIs. + if ('/' === substr($value, 0, 1)) { + return true; + } + + // Allow HTTPS domains. + if ('https://' === substr($value, 0, 8)) { + return true; + } + + // Allow HTTP domains. + if ('http://' === substr($value, 0, 7)) { + return true; + } + + // Allow known data URIs. + if (in_array(substr($value, 0, 14), array( + 'data:image/png', // PNG + 'data:image/gif', // GIF + 'data:image/jpg', // JPG + 'data:image/jpe', // JPEG + 'data:image/pjp', // PJPEG + ))) { + return true; + } + + // Allow known short data URIs. + if (in_array(substr($value, 0, 12), array( + 'data:img/png', // PNG + 'data:img/gif', // GIF + 'data:img/jpg', // JPG + 'data:img/jpe', // JPEG + 'data:img/pjp', // PJPEG + ))) { + return true; + } + + return false; + } + /** * Removes non-printable ASCII characters from string & trims it * @@ -431,6 +544,17 @@ class Sanitizer $this->removeXMLTag = (bool) $removeXMLTag; } + /** + * Whether `` elements shall be + * removed in case expansion would exceed this threshold. + * + * @param int $useThreshold + */ + public function useThreshold($useThreshold = 1000) + { + $this->useThreshold = (int)$useThreshold; + } + /** * Check to see if an attribute is an aria attribute or not * @@ -463,11 +587,44 @@ class Sanitizer */ protected function isUseTagDirty(\DOMElement $element) { - $xlinks = $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href'); - if ($xlinks && substr($xlinks, 0, 1) !== '#') { - return true; - } + $href = Helper::getElementHref($element); + return $href && strpos($href, '#') !== 0; + } + /** + * Determines whether `` is expanded + * recursively in order to create DoS scenarios. The amount of a actually + * used element needs to be below `$this->useThreshold`. + * + * @param \DOMElement $element + * @return bool + */ + protected function isUseTagExceedingThreshold(\DOMElement $element) + { + if ($this->useThreshold <= 0) { + return false; + } + $useId = Helper::extractIdReferenceFromHref( + Helper::getElementHref($element) + ); + if ($useId === null) { + return false; + } + foreach ($this->elementReferenceResolver->findByElementId($useId) as $subject) { + if ($subject->countUse() >= $this->useThreshold) { + return true; + } + } return false; } + + /** + * Set the nesting limit for tags. + * + * @param $limit + */ + public function setUseNestingLimit($limit) + { + $this->useNestingLimit = (int) $limit; + } } diff --git a/include/thirdparty/svg-sanitizer/data/AllowedAttributes.php b/include/thirdparty/svg-sanitizer/data/AllowedAttributes.php index a0c9788..a192934 100644 --- a/include/thirdparty/svg-sanitizer/data/AllowedAttributes.php +++ b/include/thirdparty/svg-sanitizer/data/AllowedAttributes.php @@ -21,6 +21,7 @@ class AllowedAttributes implements AttributeInterface { return array( // HTML + 'about', 'accept', 'action', 'align', @@ -46,6 +47,7 @@ class AllowedAttributes implements AttributeInterface 'disabled', 'download', 'enctype', + 'encoding', 'face', 'for', 'headers', @@ -108,6 +110,7 @@ class AllowedAttributes implements AttributeInterface 'usemap', 'valign', 'value', + 'version', 'width', 'xmlns', diff --git a/include/thirdparty/svg-sanitizer/data/AttributeInterface.php b/include/thirdparty/svg-sanitizer/data/AttributeInterface.php index d0e2082..f296ea2 100644 --- a/include/thirdparty/svg-sanitizer/data/AttributeInterface.php +++ b/include/thirdparty/svg-sanitizer/data/AttributeInterface.php @@ -1,6 +1,4 @@ handleDefaultNamespace(); + } + + /** + * @param string $nodeName + * @return string + */ + public function createNodeName($nodeName) + { + if (empty($this->defaultNamespaceURI)) { + return $nodeName; + } + return self::DEFAULT_NAMESPACE_PREFIX . ':' . $nodeName; + } + + protected function handleDefaultNamespace() + { + $rootElements = $this->getRootElements(); + + if (count($rootElements) !== 1) { + throw new \LogicException( + sprintf('Got %d svg elements, expected exactly one', count($rootElements)), + 1570870568 + ); + } + $this->defaultNamespaceURI = (string)$rootElements[0]->namespaceURI; + + if ($this->defaultNamespaceURI !== '') { + $this->registerNamespace(self::DEFAULT_NAMESPACE_PREFIX, $this->defaultNamespaceURI); + } + } + + /** + * @return \DOMElement[] + */ + protected function getRootElements() + { + $rootElements = []; + $elements = $this->document->getElementsByTagName('svg'); + /** @var \DOMElement $element */ + foreach ($elements as $element) { + if ($element->parentNode !== $this->document) { + continue; + } + $rootElements[] = $element; + } + return $rootElements; + } +} diff --git a/include/thirdparty/svg-sanitizer/svg-scanner.php b/include/thirdparty/svg-sanitizer/svg-scanner.php index 0da242f..e500771 100644 --- a/include/thirdparty/svg-sanitizer/svg-scanner.php +++ b/include/thirdparty/svg-sanitizer/svg-scanner.php @@ -12,9 +12,14 @@ require_once( __DIR__ . '/data/AttributeInterface.php' ); require_once( __DIR__ . '/data/TagInterface.php' ); require_once( __DIR__ . '/data/AllowedAttributes.php' ); require_once( __DIR__ . '/data/AllowedTags.php' ); +require_once( __DIR__ . '/data/XPath.php' ); +require_once( __DIR__ . '/ElementReference/Resolver.php' ); +require_once( __DIR__ . '/ElementReference/Subject.php' ); +require_once( __DIR__ . '/ElementReference/Usage.php' ); +require_once( __DIR__ . '/Exceptions/NestingException.php' ); +require_once( __DIR__ . '/Helper.php' ); require_once( __DIR__ . '/Sanitizer.php' ); - /* * Print array as JSON and then * exit program with a particular