update svg sanitizer 0.21.0

This commit is contained in:
gtbu 2025-04-06 17:51:03 +02:00
parent 4f0a7e0326
commit 692b424b64
6 changed files with 174 additions and 225 deletions

View file

@ -1,9 +1,6 @@
<?php
namespace enshrined\svgSanitize\Exceptions;
use Exception;
class NestingException extends \Exception
@ -21,7 +18,7 @@ class NestingException extends \Exception
* @param Exception|null $previous
* @param \DOMElement|null $element
*/
public function __construct($message = "", $code = 0, Exception $previous = null, \DOMElement $element = null)
public function __construct($message = "", $code = 0, ?Exception $previous = null, ?\DOMElement $element = null)
{
$this->element = $element;
parent::__construct($message, $code, $previous);
@ -36,4 +33,4 @@ class NestingException extends \Exception
{
return $this->element;
}
}
}

View file

@ -1,8 +1,8 @@
# svg-sanitizer
# svg-sanitizer 0.21.0
[![Build Status](https://travis-ci.org/darylldoyle/svg-sanitizer.svg?branch=master)](https://travis-ci.org/darylldoyle/svg-sanitizer) [![Test Coverage](https://codeclimate.com/github/darylldoyle/svg-sanitizer/badges/coverage.svg)](https://codeclimate.com/github/darylldoyle/svg-sanitizer/coverage)
[![Build Status](https://github.com/darylldoyle/svg-sanitizer/actions/workflows/tests.yml/badge.svg?branch=master)](https://travis-ci.org/darylldoyle/svg-sanitizer) [![Test Coverage](https://codeclimate.com/github/darylldoyle/svg-sanitizer/badges/coverage.svg)](https://codeclimate.com/github/darylldoyle/svg-sanitizer/coverage)
This is my attempt at building a decent SVG sanitizer in PHP. The work is laregely borrowed from [DOMPurify](https://github.com/cure53/DOMPurify).
This is my attempt at building a decent SVG sanitizer in PHP. The work is largely borrowed from [DOMPurify](https://github.com/cure53/DOMPurify).
## Installation
@ -40,17 +40,17 @@ You may pass your own whitelist of tags and attributes by using the `Sanitizer::
These methods require that you implement the `enshrined\svgSanitize\data\TagInterface` or `enshrined\svgSanitize\data\AttributeInterface`.
## Remove remote references
## Remove remote references
You have the option to remove attributes that reference remote files, this will stop HTTP leaks but will add an overhead to the sanitiser.
You have the option to remove attributes that reference remote files, this will stop HTTP leaks but will add an overhead to the sanitizer.
This defaults to false, set to true to remove references.
`$sanitizer->removeRemoteReferences(true);`
## Viewing Sanitisation Issues
## Viewing Sanitization Issues
You may use the `getXmlIssues()` method to return an array of issues that occurred during sanitisation.
You may use the `getXmlIssues()` method to return an array of issues that occurred during sanitization.
This may be useful for logging or providing feedback to the user on why an SVG was refused.
@ -58,7 +58,7 @@ This may be useful for logging or providing feedback to the user on why an SVG w
## Minification
You can minify the XML output by calling `$sanitiser->minify(true);`.
You can minify the XML output by calling `$sanitizer->minify(true);`.
## Demo
There is a demo available at: [http://svg.enshrined.co.uk/](http://svg.enshrined.co.uk/)
@ -71,9 +71,14 @@ I've just released a WordPress plugin containing this code so you can sanitize y
[Michael Potter](https://github.com/heyMP) has kindly created a Drupal module for this library which is available at: [https://www.drupal.org/project/svg_sanitizer](https://www.drupal.org/project/svg_sanitizer)
## TYPO3
This SVG sanitizer library is used per default in the core of TYPO3 v9 and later versions.
See [corresponding changelog entry](https://docs.typo3.org/c/typo3/cms-core/main/en-us/Changelog/9.5.x/Important-94492-IntroduceSVGSanitizer.html) for more details.
## Tests
You can run these by running `phpunit`
You can run these by running `vendor/bin/phpunit` from the base directory of this package.
## Standalone scanning of files via CLI
@ -85,4 +90,4 @@ Use it as follows: `php svg-scanner.php ~/svgs/myfile.svg`
## To-Do
More extensive testing for the SVGs/XML would be lovely, I'll try and add these soon. If you feel like doing it for me, please do and make a PR!
More extensive testing for the SVGs/XML would be lovely, I'll try and add these soon. If you feel like doing it for me, please do and make a PR!

View file

@ -1,5 +1,4 @@
<?php
namespace enshrined\svgSanitize;
use enshrined\svgSanitize\data\AllowedAttributes;
@ -8,7 +7,6 @@ use enshrined\svgSanitize\data\AttributeInterface;
use enshrined\svgSanitize\data\TagInterface;
use enshrined\svgSanitize\data\XPath;
use enshrined\svgSanitize\ElementReference\Resolver;
use enshrined\svgSanitize\ElementReference\Subject;
/**
* Class Sanitizer
@ -38,6 +36,11 @@ class Sanitizer
*/
protected $xmlLoaderValue;
/**
* @var bool
*/
protected $xmlErrorHandlerPreviousValue;
/**
* @var bool
*/
@ -78,6 +81,11 @@ class Sanitizer
*/
protected $useNestingLimit = 15;
/**
* @var bool
*/
protected $allowHugeFiles = false;
/**
*
*/
@ -180,12 +188,30 @@ class Sanitizer
return $this->xmlIssues;
}
/**
* Can we allow huge files?
*
* @return bool
*/
public function getAllowHugeFiles() {
return $this->allowHugeFiles;
}
/**
* Set whether we can allow huge files.
*
* @param bool $allowHugeFiles
*/
public function setAllowHugeFiles( $allowHugeFiles ) {
$this->allowHugeFiles = $allowHugeFiles;
}
/**
* Sanitize the passed string
*
* @param string $dirty
* @return string
* @return string|false
*/
public function sanitize($dirty)
{
@ -194,16 +220,22 @@ class Sanitizer
return '';
}
// Strip php tags
$dirty = preg_replace('/<\?(=|php)(.+?)\?>/i', '', $dirty);
do {
/*
* recursively remove php tags because they can be hidden inside tags
* i.e. <?p<?php test?>hp echo . ' danger! ';?>
*/
$dirty = preg_replace('/<\?(=|php)(.+?)\?>/i', '', $dirty);
} while (preg_match('/<\?(=|php)(.+?)\?>/i', $dirty) != 0);
$this->resetInternal();
$this->setUpBefore();
$loaded = $this->xmlDocument->loadXML($dirty);
$loaded = $this->xmlDocument->loadXML($dirty, $this->getAllowHugeFiles() ? LIBXML_PARSEHUGE : 0);
// If we couldn't parse the XML then we go no further. Reset and return false
if (!$loaded) {
$this->xmlIssues = self::getXmlErrors();
$this->resetAfter();
return false;
}
@ -214,13 +246,8 @@ class Sanitizer
$this->elementReferenceResolver->collect();
$elementsToRemove = $this->elementReferenceResolver->getElementsToRemove();
// Grab all the elements
$allElements = $this->xmlDocument->getElementsByTagName("*");
// remove doctype after node elements have been analyzed
$this->removeDoctype();
// Start the cleaning proccess
$this->startClean($allElements, $elementsToRemove);
// Start the cleaning process
$this->startClean($this->xmlDocument->childNodes, $elementsToRemove);
// Save cleaned XML to a variable
if ($this->removeXMLTag) {
@ -252,8 +279,9 @@ class Sanitizer
$this->xmlLoaderValue = libxml_disable_entity_loader(true);
}
// Suppress the errors because we don't really have to worry about formation before cleansing
libxml_use_internal_errors(true);
// Suppress the errors because we don't really have to worry about formation before cleansing.
// See reset in resetAfter().
$this->xmlErrorHandlerPreviousValue = libxml_use_internal_errors(true);
// Reset array of altered XML
$this->xmlIssues = array();
@ -270,19 +298,9 @@ class Sanitizer
// Reset the entity loader
libxml_disable_entity_loader($this->xmlLoaderValue);
}
}
/**
* Remove the XML Doctype
* It may be caught later on output but that seems to be buggy, so we need to make sure it's gone
*/
protected function removeDoctype()
{
foreach ($this->xmlDocument->childNodes as $child) {
if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
$child->parentNode->removeChild($child);
}
}
libxml_clear_errors();
libxml_use_internal_errors($this->xmlErrorHandlerPreviousValue);
}
/**
@ -316,33 +334,63 @@ class Sanitizer
continue;
}
// If the tag isn't in the whitelist, remove it and continue with next iteration
if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
$this->cleanHrefs($currentElement);
$this->cleanXlinkHrefs($currentElement);
$this->cleanAttributesOnWhitelist($currentElement);
if (strtolower($currentElement->tagName) === 'use') {
if ($this->isUseTagDirty($currentElement)
|| $this->isUseTagExceedingThreshold($currentElement)
) {
if ($currentElement instanceof \DOMElement) {
// If the tag isn't in the whitelist, remove it and continue with next iteration
if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious \'' . $currentElement->tagName . '\'',
'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
$this->cleanHrefs( $currentElement );
$this->cleanXlinkHrefs( $currentElement );
$this->cleanAttributesOnWhitelist($currentElement);
if (strtolower($currentElement->tagName) === 'use') {
if ($this->isUseTagDirty($currentElement)
|| $this->isUseTagExceedingThreshold($currentElement)
) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
}
// Strip out font elements that will break out of foreign content.
if (strtolower($currentElement->tagName) === 'font') {
$breaksOutOfForeignContent = false;
for ($x = $currentElement->attributes->length - 1; $x >= 0; $x--) {
// get attribute name
$attrName = $currentElement->attributes->item( $x )->nodeName;
if (in_array(strtolower($attrName), ['face', 'color', 'size'])) {
$breaksOutOfForeignContent = true;
}
}
if ($breaksOutOfForeignContent) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
'line' => $currentElement->getLineNo(),
);
continue;
}
}
}
$this->cleanUnsafeNodes($currentElement);
if ($currentElement->hasChildNodes()) {
$this->startClean($currentElement->childNodes, $elementsToRemove);
}
}
}
@ -356,7 +404,7 @@ class Sanitizer
{
for ($x = $element->attributes->length - 1; $x >= 0; $x--) {
// get attribute name
$attrName = $element->attributes->item($x)->name;
$attrName = $element->attributes->item($x)->nodeName;
// Remove attribute if not in whitelist
if (!in_array(strtolower($attrName), $this->allowedAttrs) && !$this->isAriaAttribute(strtolower($attrName)) && !$this->isDataAttribute(strtolower($attrName))) {
@ -432,15 +480,15 @@ class Sanitizer
}
}
/**
* Only allow whitelisted starts to be within the href.
*
* This will stop scripts etc from being passed through, with or without attempting to hide bypasses.
* This stops the need for us to use a complicated script regex.
*
* @param $value
* @return bool
*/
/**
* Only allow whitelisted starts to be within the href.
*
* This will stop scripts etc from being passed through, with or without attempting to hide bypasses.
* This stops the need for us to use a complicated script regex.
*
* @param $value
* @return bool
*/
protected function isHrefSafeValue($value) {
// Allow empty values
@ -476,7 +524,7 @@ class Sanitizer
'data:image/jpe', // JPEG
'data:image/pjp', // PJPEG
))) {
return true;
return true;
}
// Allow known short data URIs.
@ -627,4 +675,50 @@ class Sanitizer
{
$this->useNestingLimit = (int) $limit;
}
/**
* Remove nodes that are either invalid or malformed.
*
* @param \DOMNode $currentElement The current element.
*/
protected function cleanUnsafeNodes(\DOMNode $currentElement) {
// Replace CDATA node with encoded text node
if ($currentElement instanceof \DOMCdataSection) {
$textNode = $currentElement->ownerDocument->createTextNode($currentElement->nodeValue);
$currentElement->parentNode->replaceChild($textNode, $currentElement);
// If the element doesn't have a tagname, remove it and continue with next iteration
} elseif (!$currentElement instanceof \DOMElement && !$currentElement instanceof \DOMText) {
$currentElement->parentNode->removeChild($currentElement);
$this->xmlIssues[] = array(
'message' => 'Suspicious node \'' . $currentElement->nodeName . '\'',
'line' => $currentElement->getLineNo(),
);
return;
}
if ( $currentElement->childNodes && $currentElement->childNodes->length > 0 ) {
for ($j = $currentElement->childNodes->length - 1; $j >= 0; $j--) {
/** @var \DOMElement $childElement */
$childElement = $currentElement->childNodes->item($j);
$this->cleanUnsafeNodes($childElement);
}
}
}
/**
* Retrieve array of errors
* @return array
*/
private static function getXmlErrors()
{
$errors = [];
foreach (libxml_get_errors() as $error) {
$errors[] = [
'message' => trim($error->message),
'line' => $error->line,
];
}
return $errors;
}
}

View file

@ -1,9 +1,6 @@
<?php
namespace enshrined\svgSanitize\data;
/**
* Class AllowedAttributes
*
@ -146,6 +143,7 @@ class AllowedAttributes implements AttributeInterface
'direction',
'display',
'divisor',
'dominant-baseline',
'dur',
'edgemode',
'elevation',
@ -154,6 +152,7 @@ class AllowedAttributes implements AttributeInterface
'fill-opacity',
'fill-rule',
'filter',
'filterUnits',
'flood-color',
'flood-opacity',
'font-family',
@ -272,6 +271,7 @@ class AllowedAttributes implements AttributeInterface
'values',
'viewbox',
'visibility',
'vector-effect',
'vert-adv-y',
'vert-origin-x',
'vert-origin-y',

View file

@ -1,9 +1,6 @@
<?php
namespace enshrined\svgSanitize\data;
/**
* Class AllowedTags
*
@ -22,121 +19,9 @@ class AllowedTags implements TagInterface
return array (
// HTML
'a',
'abbr',
'acronym',
'address',
'area',
'article',
'aside',
'audio',
'b',
'bdi',
'bdo',
'big',
'blink',
'blockquote',
'body',
'br',
'button',
'canvas',
'caption',
'center',
'cite',
'code',
'col',
'colgroup',
'content',
'data',
'datalist',
'dd',
'decorator',
'del',
'details',
'dfn',
'dir',
'div',
'dl',
'dt',
'element',
'em',
'fieldset',
'figcaption',
'figure',
'font',
'footer',
'form',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'head',
'header',
'hgroup',
'hr',
'html',
'i',
'image',
'img',
'input',
'ins',
'kbd',
'label',
'legend',
'li',
'main',
'map',
'mark',
'marquee',
'menu',
'menuitem',
'meter',
'nav',
'nobr',
'ol',
'optgroup',
'option',
'output',
'p',
'pre',
'progress',
'q',
'rp',
'rt',
'ruby',
's',
'samp',
'section',
'select',
'shadow',
'small',
'source',
'spacer',
'span',
'strike',
'strong',
'style',
'sub',
'summary',
'sup',
'table',
'tbody',
'td',
'template',
'textarea',
'tfoot',
'th',
'thead',
'time',
'tr',
'track',
'tt',
'u',
'ul',
'var',
'video',
'wbr',
// SVG
'svg',
@ -207,37 +92,6 @@ class AllowedTags implements TagInterface
'feTile',
'feTurbulence',
//MathML
'math',
'menclose',
'merror',
'mfenced',
'mfrac',
'mglyph',
'mi',
'mlabeledtr',
'mmuliscripts',
'mn',
'mo',
'mover',
'mpadded',
'mphantom',
'mroot',
'mrow',
'ms',
'mpspace',
'msqrt',
'mystyle',
'msub',
'msup',
'msubsup',
'mtable',
'mtd',
'mtext',
'mtr',
'munder',
'munderover',
//text
'#text'
);

View file

@ -1,7 +1,6 @@
<?php
namespace enshrined\svgSanitize\data;
/**
* Class AttributeInterface
*
@ -16,4 +15,4 @@ interface AttributeInterface
* @return array
*/
public static function getAttributes();
}
}