svg sanitizer update

This commit is contained in:
Buchholz 2021-11-03 11:47:25 +01:00
parent bdf03254f5
commit 9642ea074b
11 changed files with 742 additions and 55 deletions

View File

@ -0,0 +1,169 @@
<?php
namespace enshrined\svgSanitize\ElementReference;
use enshrined\svgSanitize\data\XPath;
use enshrined\svgSanitize\Exceptions\NestingException;
use enshrined\svgSanitize\Helper;
class Resolver
{
/**
* @var XPath
*/
protected $xPath;
/**
* @var Subject[]
*/
protected $subjects = [];
/**
* @var array DOMElement[]
*/
protected $elementsToRemove = [];
/**
* @var int
*/
protected $useNestingLimit;
public function __construct(XPath $xPath, $useNestingLimit)
{
$this->xPath = $xPath;
$this->useNestingLimit = $useNestingLimit;
}
public function collect()
{
$this->collectIdentifiedElements();
$this->processReferences();
$this->determineInvalidSubjects();
}
/**
* Resolves one subject by element.
*
* @param \DOMElement $element
* @param bool $considerChildren Whether to search in Subject's children as well
* @return Subject|null
*/
public function findByElement(\DOMElement $element, $considerChildren = false)
{
foreach ($this->subjects as $subject) {
if (
$element === $subject->getElement()
|| $considerChildren && Helper::isElementContainedIn($element, $subject->getElement())
) {
return $subject;
}
}
return null;
}
/**
* Resolves subjects (plural!) by element id - in theory malformed
* DOM might have same ids assigned to different elements and leaving
* it to client/browser implementation which element to actually use.
*
* @param string $elementId
* @return Subject[]
*/
public function findByElementId($elementId)
{
return array_filter(
$this->subjects,
function (Subject $subject) use ($elementId) {
return $elementId === $subject->getElementId();
}
);
}
/**
* Collects elements having `id` attribute (those that can be referenced).
*/
protected function collectIdentifiedElements()
{
/** @var \DOMNodeList|\DOMElement[] $elements */
$elements = $this->xPath->query('//*[@id]');
foreach ($elements as $element) {
$this->subjects[$element->getAttribute('id')] = new Subject($element, $this->useNestingLimit);
}
}
/**
* Processes references from and to elements having `id` attribute concerning
* their occurrence in `<use ... xlink:href="#identifier">` statements.
*/
protected function processReferences()
{
$useNodeName = $this->xPath->createNodeName('use');
foreach ($this->subjects as $subject) {
$useElements = $this->xPath->query(
$useNodeName . '[@href or @xlink:href]',
$subject->getElement()
);
/** @var \DOMElement $useElement */
foreach ($useElements as $useElement) {
$useId = Helper::extractIdReferenceFromHref(
Helper::getElementHref($useElement)
);
if ($useId === null || !isset($this->subjects[$useId])) {
continue;
}
$subject->addUse($this->subjects[$useId]);
$this->subjects[$useId]->addUsedIn($subject);
}
}
}
/**
* Determines and tags infinite loops.
*/
protected function determineInvalidSubjects()
{
foreach ($this->subjects as $subject) {
if (in_array($subject->getElement(), $this->elementsToRemove)) {
continue;
}
$useId = Helper::extractIdReferenceFromHref(
Helper::getElementHref($subject->getElement())
);
try {
if ($useId === $subject->getElementId()) {
$this->markSubjectAsInvalid($subject);
} elseif ($subject->hasInfiniteLoop()) {
$this->markSubjectAsInvalid($subject);
}
} catch (NestingException $e) {
$this->elementsToRemove[] = $e->getElement();
$this->markSubjectAsInvalid($subject);
}
}
}
/**
* Get all the elements that caused a nesting exception.
*
* @return array
*/
public function getElementsToRemove() {
return $this->elementsToRemove;
}
/**
* The Subject is invalid for some reason, therefore we should
* remove it and all it's child usages.
*
* @param Subject $subject
*/
protected function markSubjectAsInvalid(Subject $subject) {
$this->elementsToRemove = array_merge(
$this->elementsToRemove,
$subject->clearInternalAndGetAffectedElements()
);
}
}

View File

@ -0,0 +1,153 @@
<?php
namespace enshrined\svgSanitize\ElementReference;
class Subject
{
/**
* @var \DOMElement
*/
protected $element;
/**
* @var Usage[]
*/
protected $useCollection = [];
/**
* @var Usage[]
*/
protected $usedInCollection = [];
/**
* @var int
*/
protected $useNestingLimit;
/**
* Subject constructor.
*
* @param \DOMElement $element
* @param int $useNestingLimit
*/
public function __construct(\DOMElement $element, $useNestingLimit)
{
$this->element = $element;
$this->useNestingLimit = $useNestingLimit;
}
/**
* @return \DOMElement
*/
public function getElement()
{
return $this->element;
}
/**
* @return string
*/
public function getElementId()
{
return $this->element->getAttribute('id');
}
/**
* @param array $subjects Previously processed subjects
* @param int $level The current level of nesting.
* @return bool
* @throws \enshrined\svgSanitize\Exceptions\NestingException
*/
public function hasInfiniteLoop(array $subjects = [], $level = 1)
{
if ($level > $this->useNestingLimit) {
throw new \enshrined\svgSanitize\Exceptions\NestingException('Nesting level too high, aborting', 1570713498, null, $this->getElement());
}
if (in_array($this, $subjects, true)) {
return true;
}
$subjects[] = $this;
foreach ($this->useCollection as $usage) {
if ($usage->getSubject()->hasInfiniteLoop($subjects, $level + 1)) {
return true;
}
}
return false;
}
/**
* @param Subject $subject
*/
public function addUse(Subject $subject)
{
if ($subject === $this) {
throw new \LogicException('Cannot add self usage', 1570713416);
}
$identifier = $subject->getElementId();
if (isset($this->useCollection[$identifier])) {
$this->useCollection[$identifier]->increment();
return;
}
$this->useCollection[$identifier] = new Usage($subject);
}
/**
* @param Subject $subject
*/
public function addUsedIn(Subject $subject)
{
if ($subject === $this) {
throw new \LogicException('Cannot add self as usage', 1570713417);
}
$identifier = $subject->getElementId();
if (isset($this->usedInCollection[$identifier])) {
$this->usedInCollection[$identifier]->increment();
return;
}
$this->usedInCollection[$identifier] = new Usage($subject);
}
/**
* @param bool $accumulated
* @return int
*/
public function countUse($accumulated = false)
{
$count = 0;
foreach ($this->useCollection as $use) {
$useCount = $use->getSubject()->countUse();
$count += $use->getCount() * ($accumulated ? 1 + $useCount : max(1, $useCount));
}
return $count;
}
/**
* @return int
*/
public function countUsedIn()
{
$count = 0;
foreach ($this->usedInCollection as $usedIn) {
$count += $usedIn->getCount() * max(1, $usedIn->getSubject()->countUsedIn());
}
return $count;
}
/**
* Clear the internal arrays (to free up memory as they can get big)
* and return all the child usages DOMElement's
*
* @return array
*/
public function clearInternalAndGetAffectedElements()
{
$elements = array_map(function(Usage $usage) {
return $usage->getSubject()->getElement();
}, $this->useCollection);
$this->usedInCollection = [];
$this->useCollection = [];
return $elements;
}
}

View File

@ -0,0 +1,49 @@
<?php
namespace enshrined\svgSanitize\ElementReference;
class Usage
{
/**
* @var Subject
*/
protected $subject;
/**
* @var int
*/
protected $count;
/**
* @param Subject $subject
* @param int $count
*/
public function __construct(Subject $subject, $count = 1)
{
$this->subject = $subject;
$this->count = (int)$count;
}
/**
* @param int $by
*/
public function increment($by = 1)
{
$this->count += (int)$by;
}
/**
* @return Subject
*/
public function getSubject()
{
return $this->subject;
}
/**
* @return int
*/
public function getCount()
{
return $this->count;
}
}

View File

@ -0,0 +1,39 @@
<?php
namespace enshrined\svgSanitize\Exceptions;
use Exception;
class NestingException extends \Exception
{
/**
* @var \DOMElement
*/
protected $element;
/**
* NestingException constructor.
*
* @param string $message
* @param int $code
* @param Exception|null $previous
* @param \DOMElement|null $element
*/
public function __construct($message = "", $code = 0, Exception $previous = null, \DOMElement $element = null)
{
$this->element = $element;
parent::__construct($message, $code, $previous);
}
/**
* Get the element that caused the exception.
*
* @return \DOMElement
*/
public function getElement()
{
return $this->element;
}
}

View File

@ -0,0 +1,53 @@
<?php
namespace enshrined\svgSanitize;
class Helper
{
/**
* @param \DOMElement $element
* @return string|null
*/
public static function getElementHref(\DOMElement $element)
{
if ($element->hasAttribute('href')) {
return $element->getAttribute('href');
}
if ($element->hasAttributeNS('http://www.w3.org/1999/xlink', 'href')) {
return $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href');
}
return null;
}
/**
* @param string $href
* @return string|null
*/
public static function extractIdReferenceFromHref($href)
{
if (!is_string($href) || strpos($href, '#') !== 0) {
return null;
}
return substr($href, 1);
}
/**
* @param \DOMElement $needle
* @param \DOMElement $haystack
* @return bool
*/
public static function isElementContainedIn(\DOMElement $needle, \DOMElement $haystack)
{
if ($needle === $haystack) {
return true;
}
foreach ($haystack->childNodes as $childNode) {
if (!$childNode instanceof \DOMElement) {
continue;
}
if (self::isElementContainedIn($needle, $childNode)) {
return true;
}
}
return false;
}
}

View File

@ -2,11 +2,13 @@
namespace enshrined\svgSanitize;
use DOMDocument;
use enshrined\svgSanitize\data\AllowedAttributes;
use enshrined\svgSanitize\data\AllowedTags;
use enshrined\svgSanitize\data\AttributeInterface;
use enshrined\svgSanitize\data\TagInterface;
use enshrined\svgSanitize\data\XPath;
use enshrined\svgSanitize\ElementReference\Resolver;
use enshrined\svgSanitize\ElementReference\Subject;
/**
* Class Sanitizer
@ -17,12 +19,7 @@ class Sanitizer
{
/**
* Regex to catch script and data values in attributes
*/
const SCRIPT_REGEX = '/(?:\w+script|data):/xi';
/**
* @var DOMDocument
* @var \DOMDocument
*/
protected $xmlDocument;
@ -51,6 +48,11 @@ class Sanitizer
*/
protected $removeRemoteReferences = false;
/**
* @var int
*/
protected $useThreshold = 1000;
/**
* @var bool
*/
@ -66,6 +68,16 @@ class Sanitizer
*/
protected $xmlIssues = array();
/**
* @var Resolver
*/
protected $elementReferenceResolver;
/**
* @var int
*/
protected $useNestingLimit = 15;
/**
*
*/
@ -81,7 +93,7 @@ class Sanitizer
*/
protected function resetInternal()
{
$this->xmlDocument = new DOMDocument();
$this->xmlDocument = new \DOMDocument();
$this->xmlDocument->preserveWhiteSpace = false;
$this->xmlDocument->strictErrorChecking = false;
$this->xmlDocument->formatOutput = !$this->minifyXML;
@ -90,7 +102,7 @@ class Sanitizer
/**
* Set XML options to use when saving XML
* See: DOMDocument::saveXML
*
*
* @param int $xmlOptions
*/
public function setXMLOptions($xmlOptions)
@ -98,15 +110,15 @@ class Sanitizer
$this->xmlOptions = $xmlOptions;
}
/**
/**
* Get XML options to use when saving XML
* See: DOMDocument::saveXML
*
*
* @return int
*/
public function getXMLOptions()
{
return $this->xmlOptions;
return $this->xmlOptions;
}
/**
@ -165,7 +177,7 @@ class Sanitizer
* @return array
*/
public function getXmlIssues() {
return $this->xmlIssues;
return $this->xmlIssues;
}
@ -196,13 +208,19 @@ class Sanitizer
return false;
}
$this->removeDoctype();
// Pre-process all identified elements
$xPath = new XPath($this->xmlDocument);
$this->elementReferenceResolver = new Resolver($xPath, $this->useNestingLimit);
$this->elementReferenceResolver->collect();
$elementsToRemove = $this->elementReferenceResolver->getElementsToRemove();
// Grab all the elements
$allElements = $this->xmlDocument->getElementsByTagName("*");
// remove doctype after node elements have been analyzed
$this->removeDoctype();
// Start the cleaning proccess
$this->startClean($allElements);
$this->startClean($allElements, $elementsToRemove);
// Save cleaned XML to a variable
if ($this->removeXMLTag) {
@ -227,12 +245,16 @@ class Sanitizer
*/
protected function setUpBefore()
{
// Turn off the entity loader
$this->xmlLoaderValue = libxml_disable_entity_loader(true);
// This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is
// disabled by default, so this function is no longer needed to protect against XXE attacks.
if (\LIBXML_VERSION < 20900) {
// Turn off the entity loader
$this->xmlLoaderValue = libxml_disable_entity_loader(true);
}
// Suppress the errors because we don't really have to worry about formation before cleansing
libxml_use_internal_errors(true);
// Reset array of altered XML
$this->xmlIssues = array();
}
@ -242,8 +264,12 @@ class Sanitizer
*/
protected function resetAfter()
{
// Reset the entity loader
libxml_disable_entity_loader($this->xmlLoaderValue);
// This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is
// disabled by default, so this function is no longer needed to protect against XXE attacks.
if (\LIBXML_VERSION < 20900) {
// Reset the entity loader
libxml_disable_entity_loader($this->xmlLoaderValue);
}
}
/**
@ -263,37 +289,57 @@ class Sanitizer
* Start the cleaning with tags, then we move onto attributes and hrefs later
*
* @param \DOMNodeList $elements
* @param array $elementsToRemove
*/
protected function startClean(\DOMNodeList $elements)
protected function startClean(\DOMNodeList $elements, array $elementsToRemove)
{
// loop through all elements
// we do this backwards so we don't skip anything if we delete a node
// see comments at: http://php.net/manual/en/class.domnamednodemap.php
for ($i = $elements->length - 1; $i >= 0; $i--) {
/** @var \DOMElement $currentElement */
$currentElement = $elements->item($i);
/**
* If the element has exceeded the nesting limit, we should remove it.
*
* As it's only <use> elements that cause us issues with nesting DOS attacks
* we should check what the element is before removing it. For now we'll only
* remove <use> elements.
*/
if (in_array($currentElement, $elementsToRemove) && 'use' === $currentElement->nodeName) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Invalid \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
// If the tag isn't in the whitelist, remove it and continue with next iteration
if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
);
continue;
}
$this->cleanAttributesOnWhitelist($currentElement);
$this->cleanHrefs($currentElement);
$this->cleanXlinkHrefs($currentElement);
$this->cleanHrefs($currentElement);
$this->cleanAttributesOnWhitelist($currentElement);
if (strtolower($currentElement->tagName) === 'use') {
if ($this->isUseTagDirty($currentElement)) {
if ($this->isUseTagDirty($currentElement)
|| $this->isUseTagExceedingThreshold($currentElement)
) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
'line' => $currentElement->getLineNo(),
);
continue;
}
@ -319,7 +365,23 @@ class Sanitizer
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'' . $attrName . '\'',
'line' => $element->getLineNo(),
);
);
}
/**
* This is used for when a namespace isn't imported properly.
* Such as xlink:href when the xlink namespace isn't imported.
* We have to do this as the link is still ran in this case.
*/
if (false !== strpos($attrName, 'href')) {
$href = $element->getAttribute($attrName);
if (false === $this->isHrefSafeValue($href)) {
$element->removeAttribute($attrName);
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);
}
}
// Do we want to strip remote references?
@ -330,7 +392,7 @@ class Sanitizer
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'' . $attrName . '\'',
'line' => $element->getLineNo(),
);
);
}
}
}
@ -344,22 +406,12 @@ class Sanitizer
protected function cleanXlinkHrefs(\DOMElement $element)
{
$xlinks = $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href');
if (preg_match(self::SCRIPT_REGEX, $xlinks) === 1) {
if (!in_array(substr($xlinks, 0, 14), array(
'data:image/png', // PNG
'data:image/gif', // GIF
'data:image/jpg', // JPG
'data:image/jpe', // JPEG
'data:image/pjp', // PJPEG
))) {
$element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' );
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);
}
if (false === $this->isHrefSafeValue($xlinks)) {
$element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' );
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
'line' => $element->getLineNo(),
);
}
}
@ -371,7 +423,7 @@ class Sanitizer
protected function cleanHrefs(\DOMElement $element)
{
$href = $element->getAttribute('href');
if (preg_match(self::SCRIPT_REGEX, $href) === 1) {
if (false === $this->isHrefSafeValue($href)) {
$element->removeAttribute('href');
$this->xmlIssues[] = array(
'message' => 'Suspicious attribute \'href\'',
@ -380,6 +432,67 @@ class Sanitizer
}
}
/**
* Only allow whitelisted starts to be within the href.
*
* This will stop scripts etc from being passed through, with or without attempting to hide bypasses.
* This stops the need for us to use a complicated script regex.
*
* @param $value
* @return bool
*/
protected function isHrefSafeValue($value) {
// Allow empty values
if (empty($value)) {
return true;
}
// Allow fragment identifiers.
if ('#' === substr($value, 0, 1)) {
return true;
}
// Allow relative URIs.
if ('/' === substr($value, 0, 1)) {
return true;
}
// Allow HTTPS domains.
if ('https://' === substr($value, 0, 8)) {
return true;
}
// Allow HTTP domains.
if ('http://' === substr($value, 0, 7)) {
return true;
}
// Allow known data URIs.
if (in_array(substr($value, 0, 14), array(
'data:image/png', // PNG
'data:image/gif', // GIF
'data:image/jpg', // JPG
'data:image/jpe', // JPEG
'data:image/pjp', // PJPEG
))) {
return true;
}
// Allow known short data URIs.
if (in_array(substr($value, 0, 12), array(
'data:img/png', // PNG
'data:img/gif', // GIF
'data:img/jpg', // JPG
'data:img/jpe', // JPEG
'data:img/pjp', // PJPEG
))) {
return true;
}
return false;
}
/**
* Removes non-printable ASCII characters from string & trims it
*
@ -431,6 +544,17 @@ class Sanitizer
$this->removeXMLTag = (bool) $removeXMLTag;
}
/**
* Whether `<use ... xlink:href="#identifier">` elements shall be
* removed in case expansion would exceed this threshold.
*
* @param int $useThreshold
*/
public function useThreshold($useThreshold = 1000)
{
$this->useThreshold = (int)$useThreshold;
}
/**
* Check to see if an attribute is an aria attribute or not
*
@ -463,11 +587,44 @@ class Sanitizer
*/
protected function isUseTagDirty(\DOMElement $element)
{
$xlinks = $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href');
if ($xlinks && substr($xlinks, 0, 1) !== '#') {
return true;
}
$href = Helper::getElementHref($element);
return $href && strpos($href, '#') !== 0;
}
/**
* Determines whether `<use ... xlink:href="#identifier">` is expanded
* recursively in order to create DoS scenarios. The amount of a actually
* used element needs to be below `$this->useThreshold`.
*
* @param \DOMElement $element
* @return bool
*/
protected function isUseTagExceedingThreshold(\DOMElement $element)
{
if ($this->useThreshold <= 0) {
return false;
}
$useId = Helper::extractIdReferenceFromHref(
Helper::getElementHref($element)
);
if ($useId === null) {
return false;
}
foreach ($this->elementReferenceResolver->findByElementId($useId) as $subject) {
if ($subject->countUse() >= $this->useThreshold) {
return true;
}
}
return false;
}
/**
* Set the nesting limit for <use> tags.
*
* @param $limit
*/
public function setUseNestingLimit($limit)
{
$this->useNestingLimit = (int) $limit;
}
}

View File

@ -21,6 +21,7 @@ class AllowedAttributes implements AttributeInterface
{
return array(
// HTML
'about',
'accept',
'action',
'align',
@ -46,6 +47,7 @@ class AllowedAttributes implements AttributeInterface
'disabled',
'download',
'enctype',
'encoding',
'face',
'for',
'headers',
@ -108,6 +110,7 @@ class AllowedAttributes implements AttributeInterface
'usemap',
'valign',
'value',
'version',
'width',
'xmlns',

View File

@ -1,6 +1,4 @@
<?php
namespace enshrined\svgSanitize\data;

View File

@ -1,9 +1,6 @@
<?php
namespace enshrined\svgSanitize\data;
/**
* Interface TagInterface
*

View File

@ -0,0 +1,64 @@
<?php
namespace enshrined\svgSanitize\data;
class XPath extends \DOMXPath
{
const DEFAULT_NAMESPACE_PREFIX = 'svg';
/**
* @var string
*/
protected $defaultNamespaceURI;
public function __construct(\DOMDocument $doc)
{
parent::__construct($doc);
$this->handleDefaultNamespace();
}
/**
* @param string $nodeName
* @return string
*/
public function createNodeName($nodeName)
{
if (empty($this->defaultNamespaceURI)) {
return $nodeName;
}
return self::DEFAULT_NAMESPACE_PREFIX . ':' . $nodeName;
}
protected function handleDefaultNamespace()
{
$rootElements = $this->getRootElements();
if (count($rootElements) !== 1) {
throw new \LogicException(
sprintf('Got %d svg elements, expected exactly one', count($rootElements)),
1570870568
);
}
$this->defaultNamespaceURI = (string)$rootElements[0]->namespaceURI;
if ($this->defaultNamespaceURI !== '') {
$this->registerNamespace(self::DEFAULT_NAMESPACE_PREFIX, $this->defaultNamespaceURI);
}
}
/**
* @return \DOMElement[]
*/
protected function getRootElements()
{
$rootElements = [];
$elements = $this->document->getElementsByTagName('svg');
/** @var \DOMElement $element */
foreach ($elements as $element) {
if ($element->parentNode !== $this->document) {
continue;
}
$rootElements[] = $element;
}
return $rootElements;
}
}

View File

@ -12,9 +12,14 @@ require_once( __DIR__ . '/data/AttributeInterface.php' );
require_once( __DIR__ . '/data/TagInterface.php' );
require_once( __DIR__ . '/data/AllowedAttributes.php' );
require_once( __DIR__ . '/data/AllowedTags.php' );
require_once( __DIR__ . '/data/XPath.php' );
require_once( __DIR__ . '/ElementReference/Resolver.php' );
require_once( __DIR__ . '/ElementReference/Subject.php' );
require_once( __DIR__ . '/ElementReference/Usage.php' );
require_once( __DIR__ . '/Exceptions/NestingException.php' );
require_once( __DIR__ . '/Helper.php' );
require_once( __DIR__ . '/Sanitizer.php' );
/*
* Print array as JSON and then
* exit program with a particular