From 4ad157b14b373bb67625c82985bfbb8f18714301 Mon Sep 17 00:00:00 2001 From: gtbu Date: Fri, 7 Jun 2024 13:28:57 +0200 Subject: [PATCH] php-dom-wrapper 3.0 Simple DOM wrapper library to manipulate and traverse HTML documents similar to jQuery (php8+) --- .../dom/Collections/NodeCollection.php | 169 +++ include/thirdparty/dom/Comment.php | 24 + include/thirdparty/dom/Document.php | 346 +++++ include/thirdparty/dom/DocumentType.php | 24 + include/thirdparty/dom/Element.php | 24 + include/thirdparty/dom/LICENSE | 30 + include/thirdparty/dom/NodeList.php | 297 +++++ .../thirdparty/dom/ProcessingInstruction.php | 24 + include/thirdparty/dom/README.md | 1123 +++++++++++++++++ include/thirdparty/dom/Text.php | 24 + include/thirdparty/dom/Traits/CommonTrait.php | 38 + .../dom/Traits/ManipulationTrait.php | 748 +++++++++++ include/thirdparty/dom/Traits/NodeTrait.php | 46 + .../thirdparty/dom/Traits/TraversalTrait.php | 468 +++++++ 14 files changed, 3385 insertions(+) create mode 100644 include/thirdparty/dom/Collections/NodeCollection.php create mode 100644 include/thirdparty/dom/Comment.php create mode 100644 include/thirdparty/dom/Document.php create mode 100644 include/thirdparty/dom/DocumentType.php create mode 100644 include/thirdparty/dom/Element.php create mode 100644 include/thirdparty/dom/LICENSE create mode 100644 include/thirdparty/dom/NodeList.php create mode 100644 include/thirdparty/dom/ProcessingInstruction.php create mode 100644 include/thirdparty/dom/README.md create mode 100644 include/thirdparty/dom/Text.php create mode 100644 include/thirdparty/dom/Traits/CommonTrait.php create mode 100644 include/thirdparty/dom/Traits/ManipulationTrait.php create mode 100644 include/thirdparty/dom/Traits/NodeTrait.php create mode 100644 include/thirdparty/dom/Traits/TraversalTrait.php diff --git a/include/thirdparty/dom/Collections/NodeCollection.php b/include/thirdparty/dom/Collections/NodeCollection.php new file mode 100644 index 0000000..80a9beb --- /dev/null +++ b/include/thirdparty/dom/Collections/NodeCollection.php @@ -0,0 +1,169 @@ +nodes[] = $node; + } + } + + /** + * @see \Countable::count() + * + * @return int + */ + public function count(): int { + return count($this->nodes); + } + + /** + * @see \ArrayAccess::offsetExists() + * + * @param mixed $offset + * + * @return bool + */ + public function offsetExists(mixed $offset): bool { + return isset($this->nodes[$offset]); + } + + /** + * @see \ArrayAccess::offsetGet() + * + * @param mixed $offset + * + * @return mixed + */ + public function offsetGet(mixed $offset): mixed { + return isset($this->nodes[$offset]) ? $this->nodes[$offset] : null; + } + + /** + * @see \ArrayAccess::offsetSet() + * + * @param mixed $offset + * @param mixed $value + */ + public function offsetSet(mixed $offset, mixed $value): void { + if (is_null($offset)) { + $this->nodes[] = $value; + } else { + $this->nodes[$offset] = $value; + } + } + + /** + * @see \ArrayAccess::offsetUnset() + * + * @param mixed $offset + */ + public function offsetUnset(mixed $offset): void { + unset($this->nodes[$offset]); + } + + /** + * @see \RecursiveIterator::RecursiveIteratorIterator() + * + * @return \RecursiveIteratorIterator + */ + public function getRecursiveIterator(): \RecursiveIteratorIterator { + return new \RecursiveIteratorIterator($this, \RecursiveIteratorIterator::SELF_FIRST); + } + + /** + * @see \RecursiveIterator::getChildren() + * + * @return \RecursiveIterator + */ + public function getChildren(): \RecursiveIterator { + $nodes = []; + + if ($this->valid()) { + $nodes = $this->current()->childNodes; + } + + return new static($nodes); + } + + /** + * @see \RecursiveIterator::hasChildren() + * + * @return bool + */ + public function hasChildren(): bool { + if ($this->valid()) { + return $this->current()->hasChildNodes(); + } + + return false; + } + + /** + * @see \RecursiveIterator::current() + * @see \Iterator::current() + * + * @return mixed + */ + public function current(): mixed { + return current($this->nodes); + } + + /** + * @see \RecursiveIterator::key() + * @see \Iterator::key() + * + * @return mixed + */ + public function key(): mixed { + return key($this->nodes); + } + + /** + * @see \RecursiveIterator::next() + * @see \Iterator::next() + * + * @return void + */ + public function next(): void { + next($this->nodes); + } + + /** + * @see \RecursiveIterator::rewind() + * @see \Iterator::rewind() + * + * @return void + */ + public function rewind(): void { + reset($this->nodes); + } + + /** + * @see \RecursiveIterator::valid() + * @see \Iterator::valid() + * + * @return bool + */ + public function valid(): bool { + return key($this->nodes) !== null; + } +} \ No newline at end of file diff --git a/include/thirdparty/dom/Comment.php b/include/thirdparty/dom/Comment.php new file mode 100644 index 0000000..10a178a --- /dev/null +++ b/include/thirdparty/dom/Comment.php @@ -0,0 +1,24 @@ +registerNodeClass('DOMText', 'DOMWrap\\Text'); + $this->registerNodeClass('DOMElement', 'DOMWrap\\Element'); + $this->registerNodeClass('DOMComment', 'DOMWrap\\Comment'); + $this->registerNodeClass('DOMDocument', 'DOMWrap\\Document'); + $this->registerNodeClass('DOMDocumentType', 'DOMWrap\\DocumentType'); + $this->registerNodeClass('DOMProcessingInstruction', 'DOMWrap\\ProcessingInstruction'); + } + + /** + * Set libxml options. + * + * Multiple values must use bitwise OR. + * eg: LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD + * + * @link http://php.net/manual/en/libxml.constants.php + * + * @param int $libxmlOptions + */ + public function setLibxmlOptions(int $libxmlOptions): void { + $this->libxmlOptions = $libxmlOptions; + } + + /** + * {@inheritdoc} + */ + public function document(): ?\DOMDocument { + return $this; + } + + /** + * {@inheritdoc} + */ + public function collection(): NodeList { + return $this->newNodeList([$this]); + } + + /** + * {@inheritdoc} + */ + public function result(NodeList $nodeList): NodeList|\DOMNode|null { + if ($nodeList->count()) { + return $nodeList->first(); + } + + return null; + } + + /** + * {@inheritdoc} + */ + public function parent(string|NodeList|\DOMNode|callable|null $selector = null): Document|Element|NodeList|null { + return null; + } + + /** + * {@inheritdoc} + */ + public function parents(?string $selector = null): NodeList { + return $this->newNodeList(); + } + + /** + * {@inheritdoc} + */ + public function substituteWith(string|NodeList|\DOMNode|callable $input): self { + $this->manipulateNodesWithInput($input, function($node, $newNodes) { + foreach ($newNodes as $newNode) { + $node->replaceChild($newNode, $node); + } + }); + + return $this; + } + + /** + * {@inheritdoc} + */ + public function _clone(): void { + return; + } + + /** + * {@inheritdoc} + */ + public function getHtml(bool $isIncludeAll = false): string { + return $this->getOuterHtml($isIncludeAll); + } + + /** + * {@inheritdoc} + */ + public function setHtml(string|NodeList|\DOMNode|callable $input): self { + if (!is_string($input) || trim($input) == '') { + return $this; + } + + $internalErrors = libxml_use_internal_errors(true); + if (\PHP_VERSION_ID < 80000) { + $disableEntities = libxml_disable_entity_loader(true); + $this->composeXmlNode($input); + libxml_use_internal_errors($internalErrors); + libxml_disable_entity_loader($disableEntities); + } else { + $this->composeXmlNode($input); + libxml_use_internal_errors($internalErrors); + } + + return $this; + } + + /** + * @param string $html + * @param int $options + * + * @return bool + */ + public function loadHTML(string $html, int $options = 0): bool { + // Fix LibXML's crazy-ness RE root nodes + // While importing HTML using the LIBXML_HTML_NOIMPLIED option LibXML insists + // on having one root node. All subsequent nodes are appended to this first node. + // To counter this we will create a fake element, allow LibXML to 'do its thing' + // then undo it by taking the contents of the fake element, placing it back into + // the root and then remove our fake element. + if ($options & LIBXML_HTML_NOIMPLIED) { + $html = '' . $html; + } + + $html = 'getEncoding() ?? 'UTF-8') . '">' . $html; + + $result = parent::loadHTML($html, $options); + + // Do our re-shuffling of nodes. + if ($this->libxmlOptions & LIBXML_HTML_NOIMPLIED) { + $this->children()->first()->contents()->each(function($node){ + $this->appendWith($node); + }); + + $this->removeChild($this->children()->first()); + } + + return $result; + } + + /** + * @param \DOMNode $node + * + * @return string|bool + */ + public function saveHTML(?\DOMNode $node = null): string|false { + $target = $node ?: $this; + + // Undo any url encoding of attributes automatically applied by LibXML. + // See htmlAttrDumpOutput() in: + // https://github.com/GNOME/libxml2/blob/master/HTMLtree.c + $i = 0; + $search = []; + $replace = []; + $escapes = [ + ['attr' => 'src'], + ['attr' => 'href'], + ['attr' => 'action'], + ['attr' => 'name', 'tag' => 'a'], + ]; + + $nodes = $target->find('*[src],*[href],*[action],a[name]', 'descendant-or-self::'); + + foreach ($nodes as $node) { + foreach ($escapes as $escape) { + if ( + (!array_key_exists('tag', $escape) || strcasecmp($node->tagName, $escape['tag']) === 0) + && $node->hasAttribute($escape['attr']) + ) { + $value = $node->getAttribute($escape['attr']); + $newName = 'DOMWRAP--ATTR-' . $i . '--' . $escape['attr']; + + $node->setAttribute($newName, $value); + $node->removeAttribute($escape['attr']); + + // Determine if the attribute will be wrapped in single + // or double quotes and further encodings to apply. + // + // See xmlBufWriteQuotedString() in: + // https://github.com/GNOME/libxml2/blob/master/buf.c + $hasQuot = strstr($value, '"'); + $hasApos = strstr($value, "'"); + + if ($hasQuot && $hasApos) { + $value = str_replace('"', '"', $value); + } + + $char = '"'; + + if ($hasQuot && !$hasApos) { + $char = "'"; + } + + // See xmlEscapeEntities() in: + // https://github.com/GNOME/libxml2/blob/master/xmlsave.c + $searchValue = str_replace(['<', '>', '&'], ['<', '>', '&'], $value); + + $search[] = $newName. '=' . $char . $searchValue . $char; + $replace[] = $escape['attr']. '=' . $char . $value . $char; + + $i++; + } + } + } + + $html = parent::saveHTML($target); + + $html = str_replace($search, $replace, $html); + + return $html; + } + + /* + * @param $encoding string|null + */ + public function setEncoding(?string $encoding = null): void { + $this->documentEncoding = $encoding; + } + + /* + * @return string|null + */ + public function getEncoding(): ?string { + return $this->documentEncoding; + } + + /* + * @param $html string + * + * @return string|null + */ + private function getCharset(string $html): ?string { + $charset = null; + + if (preg_match('@]*?charset=["\']?([^"\'\s>]+)@im', $html, $matches)) { + $charset = mb_strtoupper($matches[1]); + } + + return $charset; + } + + /* + * @param $html string + */ + private function detectEncoding(string $html): void { + $charset = $this->getEncoding(); + + if (is_null($charset)) { + $charset = $this->getCharset($html); + } + + $detectedCharset = mb_detect_encoding($html, mb_detect_order(), true); + + if ($charset === null && $detectedCharset == 'UTF-8') { + $charset = $detectedCharset; + } + + $this->setEncoding($charset); + } + + /* + * @param $html string + * + * @return string + */ + private function convertToUtf8(string $html): string { + $charset = $this->getEncoding(); + + if ($charset !== null) { + $html = preg_replace('@(charset=["]?)([^"\s]+)([^"]*["]?)@im', '$1UTF-8$3', $html); + $mbHasCharset = in_array($charset, array_map('mb_strtoupper', mb_list_encodings())); + + if ($mbHasCharset) { + $html = mb_convert_encoding($html, 'UTF-8', $charset); + + // Fallback to iconv if available. + } elseif (extension_loaded('iconv')) { + $htmlIconv = iconv($charset, 'UTF-8', $html); + + if ($htmlIconv !== false) { + $html = $htmlIconv; + } else { + $charset = null; + } + } + } + + if ($charset === null) { + $html = htmlspecialchars_decode(mb_encode_numericentity(htmlentities($html, ENT_QUOTES, 'UTF-8'), [0x80, 0x10FFFF, 0, ~0], 'UTF-8')); + } + + return $html; + } + + /** + * @param $html string + */ + private function composeXmlNode(string $html): void { + $this->detectEncoding($html); + + $html = $this->convertToUtf8($html); + + $this->loadHTML($html, $this->libxmlOptions); + + // Remove processing instruction. + $this->contents()->each(function($node) { + if ($node instanceof ProcessingInstruction && $node->nodeName == 'xml') { + $node->destroy(); + } + }); + } +} diff --git a/include/thirdparty/dom/DocumentType.php b/include/thirdparty/dom/DocumentType.php new file mode 100644 index 0000000..3d6fe49 --- /dev/null +++ b/include/thirdparty/dom/DocumentType.php @@ -0,0 +1,24 @@ +document = $document; + } + + /** + * @param string $name + * @param array $arguments + * + * @return mixed + */ + public function __call(string $name, array $arguments) { + try { + $result = $this->__manipulationCall($name, $arguments); + } catch (\BadMethodCallException $e) { + if (!$this->first() || !method_exists($this->first(), $name)) { + throw new \BadMethodCallException("Call to undefined method " . get_class($this) . '::' . $name . "()"); + } + + $result = call_user_func_array([$this->first(), $name], $arguments); + } + + return $result; + } + + /** + * {@inheritdoc} + */ + public function collection(): NodeList { + return $this; + } + + /** + * {@inheritdoc} + */ + public function document(): ?\DOMDocument { + return $this->document; + } + + /** + * {@inheritdoc} + */ + public function result(NodeList $nodeList): NodeList|\DOMNode|null { + return $nodeList; + } + + /** + * @return NodeList + */ + public function reverse(): NodeList { + array_reverse($this->nodes); + + return $this; + } + + /** + * @return mixed + */ + public function first(): mixed { + if (!empty($this->nodes)) { + $this->rewind(); + return $this->current(); + } + + return null; + } + + /** + * @return mixed + */ + public function last(): mixed { + return $this->end(); + } + + /** + * @return mixed + */ + public function end(): mixed { + return !empty($this->nodes) ? end($this->nodes) : null; + } + + /** + * @param int $key + * + * @return mixed + */ + public function get(int $key): mixed { + if (isset($this->nodes[$key])) { + return $this->nodes[$key]; + } + + return null; + } + + /** + * @param int $key + * @param mixed $value + * + * @return self + */ + public function set(int $key, mixed $value): self { + $this->nodes[$key] = $value; + + return $this; + } + + /** + * @param callable $function + * + * @return self + */ + public function each(callable $function): self { + foreach ($this->nodes as $index => $node) { + $result = $function($node, $index); + + if ($result === false) { + break; + } + } + + return $this; + } + + /** + * @param callable $function + * + * @return NodeList + */ + public function map(callable $function): NodeList { + $nodes = $this->newNodeList(); + + foreach ($this->nodes as $node) { + $result = $function($node); + + if (!is_null($result) && $result !== false) { + $nodes[] = $result; + } + } + + return $nodes; + } + + /** + * @param callable $function + * @param mixed|null $initial + * + * @return iterable + */ + public function reduce(callable $function, mixed $initial = null) { + return array_reduce($this->nodes, $function, $initial); + } + + /** + * @return array + */ + public function toArray(): iterable { + return $this->nodes; + } + + /** + * @param iterable $nodes + */ + public function fromArray(?iterable $nodes = null) { + $this->nodes = []; + + if (is_iterable($nodes)) { + foreach ($nodes as $node) { + $this->nodes[] = $node; + } + } + } + + /** + * @param NodeList|array $elements + * + * @return NodeList + */ + public function merge(NodeList|array $elements = []): NodeList { + if (!is_array($elements)) { + $elements = $elements->toArray(); + } + + return $this->newNodeList(array_merge($this->toArray(), $elements)); + } + + /** + * @param int $start + * @param int $end + * + * @return NodeList + */ + public function slice(int $start, ?int $end = null): NodeList { + $nodeList = array_slice($this->toArray(), $start, $end); + + return $this->newNodeList($nodeList); + } + + /** + * @param \DOMNode $node + * + * @return self + */ + public function push(\DOMNode $node): self { + $this->nodes[] = $node; + + return $this; + } + + /** + * @return \DOMNode + */ + public function pop(): \DOMNode { + return array_pop($this->nodes); + } + + /** + * @param \DOMNode $node + * + * @return self + */ + public function unshift(\DOMNode $node): self { + array_unshift($this->nodes, $node); + + return $this; + } + + /** + * @return \DOMNode + */ + public function shift(): \DOMNode { + return array_shift($this->nodes); + } + + /** + * @param \DOMNode $node + * + * @return bool + */ + public function exists(\DOMNode $node): bool { + return in_array($node, $this->nodes, true); + } + + /** + * @param \DOMNode $node + * + * @return self + */ + public function delete(\DOMNode $node): self { + $index = array_search($node, $this->nodes, true); + + if ($index !== false) { + unset($this->nodes[$index]); + } + + return $this; + } + + /** + * @return bool + */ + public function isRemoved(): bool { + return false; + } +} \ No newline at end of file diff --git a/include/thirdparty/dom/ProcessingInstruction.php b/include/thirdparty/dom/ProcessingInstruction.php new file mode 100644 index 0000000..cdb7e15 --- /dev/null +++ b/include/thirdparty/dom/ProcessingInstruction.php @@ -0,0 +1,24 @@ +
  • First
  • Second
  • Third
  • '; + +$doc = new Document(); +$doc->html($html); +$nodes = $doc->find('li'); + +// Returns '3' +var_dump($nodes->count()); + +// Append as a child node to each
  • +$nodes->appendWith('!'); + +// Returns:
    • First!
    • Second!
    • Third!
    +var_dump($doc->html()); +``` + +--- + +## Methods + +### Manipulation + +#### addClass + +``` +self addClass(string|callable $class) +``` + +##### Example + +```php +$doc = (new Document())->html('

    first paragraph

    second paragraph

    '); +$doc->find('p')->addClass('text-center'); +``` + +*Result:* + +``` html +

    first paragraph

    second paragraph

    +``` + +--- + +#### follow + +``` +self follow(string|NodeList|\DOMNode|callable $input) +``` + +Insert the argument as a sibling directly after each of the nodes operated on. + +##### Example + +``` php +$doc = (new Document())->html('
    • first
    • second
    '); +$doc->find('li')->first()->follow('
  • first-and-a-half
  • '); + +``` + +*Result:* + +``` html + +``` + +--- + +#### appendWith + +``` +self appendWith(string|NodeList|\DOMNode|callable $input) +``` + +##### Example + +``` php +$doc = (new Document())->html('
    The quick brown fox jumps over the lazy dog
    '); +$doc->find('div')->appendWith(' Appended!'); +``` + +*Result:* + +``` html +
    The quick brown fox jumps over the lazy dog Appended!
    +``` + +--- + +#### appendTo + +``` +self appendTo(string|NodeList|\DOMNode $selector) +``` + +##### Example + +``` php +$doc = (new Document())->html('
    The quick brown fox jumps over the lazy dog
    '); +$doc->create(' Appended!')->appendTo('div'); +``` + +*Result:* +``` html +
    The quick brown fox jumps over the lazy dog Appended!
    +``` + +--- + +#### attr + +``` +self|string attr(string $name[, mixed $value = null]) +``` + +##### Example #1 (Set) + +``` php +$doc = (new Document())->html('
    '); +$doc->attr('class', 'text-left'); +``` + +*Result:* + +``` html +
    +``` + +##### Example #2 (Get) + +``` php +$doc = (new Document())->html('
    '); +echo $doc->attr('text-center'); +``` + +*Result:* + +``` html +text-center +``` + +--- + +#### precede + +``` +self precede(string|NodeList|\DOMNode|callable $input) +``` + +Insert the argument as a sibling just before each of the nodes operated on. + +##### Example + +``` php +$doc = (new Document())->html(''); +doc->find('li')->first()->precede('
  • zeroth
  • '); +``` + +*Result:* + +``` html + +``` + +--- + +#### clone + +``` +NodeList|\DOMNode clone() +``` + +##### Example + +``` php +$doc = (new Document())->html(''); +$doc->find('div')->clone()->appendTo('ul'); +``` + +*Result:* + +``` html + +``` + +--- + +#### destroy + +``` +self destroy([string $selector = null]) +``` + +##### Example + +``` php +$doc = (new Document())->html(''); +$doc->find('.first')->destroy(); +``` + +*Result:* +``` html + +``` + +--- + +#### detach + +``` +NodeList detach([string $selector = null]) +``` + +##### Example + +``` php +$doc = (new Document())->html(''); +$el = $doc->find('ul.first li')->detach(); +$doc->first('ul.second').append($el); +``` + +*Result:* + +``` html + +``` + +--- + +#### empty + +``` +self empty() +``` + +##### Example + +``` php +$doc = (new Document())->html('
    The quick brown fox jumps over the lazy dog
    '); +$doc->find('div')->empty(); +``` + +*Result:* + +``` html +
    +``` + +--- + +#### hasClass + +``` +bool hasClass(string $class) +``` + +##### Example + +``` php +$doc = (new Document())->html('
    '); +echo $doc->first('div')->hasClass('text-center'); +``` + +*Result:* + +``` html +true +``` + +--- + +#### html + +``` +string|self html([string|NodeList|\DOMNode|callable $input = null]) +``` + +##### Example #1 (Set) + +``` php +$doc = (new Document()); +$doc->html('
    '); +``` + +*Result:* + +``` html +
    +``` + +##### Example #1 (Get) + +``` php +$doc = (new Document())->html('
    '); +$doc->find('div')->appendWith('Example!'); +echo $doc->html(); +``` + +*Result:* + +``` html +
    Example!
    +``` + +--- + +#### prependWith + +``` +self prependWith(string|NodeList|\DOMNode|callable $input) +``` + +##### Example + +``` php +$doc = (new Document())->html('
    The quick brown fox jumps over the lazy dog
    '); +$doc->find('div')->prependWith('Prepended! '); +``` + +*Result:* + +``` html +
    Prepended! The quick brown fox jumps over the lazy dog
    +``` + +--- + +#### prependTo + +``` +self prependTo(string|NodeList|\DOMNode $selector) +``` + +##### Example + +``` php +$doc = (new Document())->html('
    The quick brown fox jumps over the lazy dog
    '); +$doc->create('Prepended! ')->appendTo('div'); +``` + +*Result:* +``` html +
    Prepended! The quick brown fox jumps over the lazy dog
    +``` + +--- + +#### removeAttr + +``` +self removeAttr(string $name) +``` + +##### Example + +``` php +$doc = (new Document())->html('
    '); +$doc->find('div').removeAttr('class'); +``` + +*Result:* +``` html +
    +``` + +--- + +#### removeClass + +``` +self removeClass(string|callable $class) +``` + +##### Example + +``` php +$doc = (new Document())->html('
    '); +$doc->find('div').removeClass('first'); +``` + +*Result:* +``` html +
    +``` + +--- + +#### substituteWith + +``` +self substituteWith(string|NodeList|\DOMNode|callable $input) +``` + +##### Example + +``` php +``` + +--- + +#### text + +``` +string|self text([string|NodeList|\DOMNode|callable $input = null]) +``` + +##### Example + +``` php +``` + +--- + +#### unwrap + +``` +self unwrap() +``` + +Unwrap each current node by removing its parent, replacing the parent +with its children (i.e. the current node and its siblings). + +Note that each node is operated on separately, so when you call +`unwrap()` on a `NodeList` containing two siblings, *two* parents will +be removed. + +##### Example + +``` php +$doc = (new Document())->html('
    '); +$doc->find('#first')->unwrap(); +``` + +*Result:* + +``` html +
    +
    +``` + +--- + +#### wrap + +``` +self wrap(string|NodeList|\DOMNode|callable $input) +``` + +Wrap the current node or nodes in the given structure. + +The wrapping structure can be nested, but should only contain one node +on each level (any extra siblings are removed). The outermost node +replaces the node operated on, while the node operated on is put into +the innermost node. + +If called on a `NodeList`, each of nodes in the list will be separately +wrapped. When such a list contains multiple nodes, the argument to +wrap() cannot be a `NodeList` or `\DOMNode`, since those can be used +to wrap a node only once. A string or callable returning a string or a +unique `NodeList` or `\DomNode` every time can be used in this case. + +When a callable is passed, it is called once for each node operated on, +passing that node and its index. The callable should return either a +string, or a unique `NodeList` or `\DOMNode` ever time it is called. + +Note that this returns the original node like all other methods, not the +(new) node(s) wrapped around it. + +##### Example + +``` php +$doc = (new Document())->html('foobar'); +$doc->find->('span')->wrap('

    '); +``` + +*Result:* + +``` html +

    foo

    +

    bar

    +``` + + +--- + +#### wrapAll + +``` +self wrapAll(string|NodeList|\DOMNode|callable $input) +``` + +Like [wrap()](#wrap), but when operating on multiple nodes, all of them +will be wrapped together in a single instance of the given structure, +rather than each of them individually. + +Note that the wrapping structure replaces the first node operated on, so +if the other nodes operated on are not siblings of the first, they will +be moved inside the document. + +##### Example + +``` php +$doc = (new Document())->html('foobar'); +$doc->find->('span')->wrapAll('

    '); +``` + +*Result:* + +``` html +

    + foo + bar +

    +``` + +--- + +#### wrapInner + +``` +self wrapInner(string|NodeList|\DOMNode|callable $input) +``` + +Like [wrap()](#wrap), but rather than wrapping the nodes that are being +operated on, this wraps their contents. + +##### Example + +``` php +$doc = (new Document())->html('foobar'); +$doc->find('span')->wrapInner(''); +``` + +*Result:* + +``` html +foo +bar +``` + +--- + + +### Traversal + +#### add + +``` +NodeList add(string|NodeList|\DOMNode $input) +``` + +Add additional node(s) to the existing set. + +##### Example + +``` php +$nodes = $doc->find('a'); +$nodes->add($doc->find('p')); +``` + +--- + +#### children + +``` +NodeList children() +``` + +Return all children of each element node in the current set. + +##### Example + +``` php +$nodes = $doc->find('p'); +$childrenOfParagraphs = $nodes->children(); +``` + +--- + +#### closest + +``` +Element|NodeList|null closest(string|NodeList|\DOMNode|callable $input) +``` + +Return the first element matching the supplied input by traversing up through the ancestors of each node in the current set. + +##### Example + +``` php +$nodes = $doc->find('a'); +$closestAncestors = $nodes->closest('p'); +``` + +--- + +#### contents + +``` +NodeList contents() +``` + +Return all children of each node in the current set. + +##### Example + +``` php +$nodes = $doc->find('p'); +$contents = $nodes->contents(); +``` + +--- + +#### eq + +``` +\DOMNode|null eq(int $index) +``` + +Return node in the current set at the specified index. + +##### Example + +``` php +$nodes = $doc->find('a'); +$nodeAtIndexOne = $nodes->eq(1); +``` + +--- + +#### filter + +``` +NodeList filter(string|NodeList|\DOMNode|callable $input) +``` + +Return nodes in the current set that match the input. + +##### Example + +``` php +$nodes = $doc->filter('a') +$exampleATags = $nodes->filter('[href*=https://example.org/]'); +``` + +--- + +#### find + +``` +NodeList find(string $selector[, string $prefix = 'descendant::']) +``` + +Return the decendants of the current set filtered by the selector and optional XPath axes. + +##### Example + +``` php +$nodes = $doc->find('a'); +``` + +--- + +#### first + +``` +mixed first() +``` + +Return the first node of the current set. + +##### Example + +``` php +$nodes = $doc->find('a'); +$firstNode = $nodes->first(); +``` + +--- + +#### has + +``` +NodeList has(string|NodeList|\DOMNode|callable $input) +``` + +Return nodes with decendants of the current set matching the input. + +##### Example + +``` php +$nodes = $doc->find('a'); +$anchorTags = $nodes->has('span'); +``` + +--- + +#### is + +``` +bool is(string|NodeList|\DOMNode|callable $input) +``` + +Test if nodes from the current set match the input. + +##### Example + +``` php +$nodes = $doc->find('a'); +$isAnchor = $nodes->is('[anchor]'); +``` + +--- + +#### last + +``` +mixed last() +``` + +Return the last node of the current set. + +##### Example + +``` php +$nodes = $doc->find('a'); +$lastNode = $nodes->last(); +``` + +--- + +#### map + +``` +NodeList map(callable $function) +``` + +Apply a callback to nodes in the current set and return a new NodeList. + +##### Example + +``` php +$nodes = $doc->find('a'); +$nodeValues = $nodes->map(function($node) { + return $node->nodeValue; +}); +``` + +--- + +#### following + +``` +\DOMNode|null following([string|NodeList|\DOMNode|callable $selector = null]) +``` + +Return the sibling immediately following each element node in the current set. + +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('a'); +$follwingNodes = $nodes->following(); +``` + +--- + +#### followingAll + +``` +NodeList followingAll([string|NodeList|\DOMNode|callable $selector = null]) +``` + +Return all siblings following each element node in the current set. + +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('a'); +$follwingAllNodes = $nodes->followingAll('[anchor]'); +``` + +--- + +#### followingUntil + +``` +NodeList followingUntil([[string|NodeList|\DOMNode|callable $input = null], string|NodeList|\DOMNode|callable $selector = null]) +``` + +Return all siblings following each element node in the current set upto but not including the node matched by $input. + +*Optionally filtered by input.*
    +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('a'); +$follwingUntilNodes = $nodes->followingUntil('.submit'); +``` + +--- + +#### not + +``` +NodeList not(string|NodeList|\DOMNode|callable $input) +``` + +Return element nodes from the current set not matching the input. + +##### Example + +``` php +$nodes = $doc->find('a'); +$missingHrefAttribute = $nodes->not('[href]'); +``` + +--- + +#### parent + +``` +Element|NodeList|null parent([string|NodeList|\DOMNode|callable $selector = null]) +``` + +Return the immediate parent of each element node in the current set. + +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('a'); +$parentNodes = $nodes->parent(); +``` + +--- + +#### parents + +``` +NodeList parent([string $selector = null]) +``` + +Return the ancestors of each element node in the current set. + +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('a'); +$ancestorDivNodes = $nodes->parents('div'); +``` + +--- + +#### parentsUntil + +``` +NodeList parentsUntil([[string|NodeList|\DOMNode|callable $input, [string|NodeList|\DOMNode|callable $selector = null]) +``` + +Return the ancestors of each element node in the current set upto but not including the node matched by $selector. + +*Optionally filtered by input.*
    +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('a'); +$ancestorDivNodes = $nodes->parentsUntil('div'); +``` + +--- + +#### preceding + +``` +\DOMNode|null preceding([string|NodeList|\DOMNode|callable $selector = null]) +``` + +Return the sibling immediately preceding each element node in the current set. + +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('a'); +$precedingNodes = $nodes->preceding(); +``` + +--- + +#### precedingAll + +``` +NodeList precedingAll([string|NodeList|\DOMNode|callable $selector = null]) +``` + +Return all siblings preceding each element node in the current set. + +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('a'); +$precedingAllNodes = $nodes->precedingAll('[anchor]'); +``` + +--- +#### precedingUntil + +``` +NodeList precedingUntil([[string|NodeList|\DOMNode|callable $input = null], string|NodeList|\DOMNode|callable $selector = null]) +``` + +Return all siblings preceding each element node in the current set upto but not including the node matched by $input. + +*Optionally filtered by input.*
    +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('a'); +$precedingUntilNodes = $nodes->precedingUntil('.submit'); +``` + +--- + +#### siblings + +``` +NodeList siblings([[string|NodeList|\DOMNode|callable $selector = null]) +``` + +Return siblings of each element node in the current set. + +*Optionally filtered by selector.* + +##### Example + +``` php +$nodes = $doc->find('p'); +$siblings = $nodes->siblings(); +``` + +--- + +#### slice + +``` +NodeList slice(int $start[, int $end]) +``` + +Return a subset of the current set based on the start and end indexes. + +##### Example + +``` php +$nodes = $doc->find('p'); +// Return nodes 1 through to 3 as a new NodeList +$slicedNodes = $nodes->slice(1, 3); +``` + +--- + +### Additional Methods + +#### count + +``` +int count() +``` + +##### Example + +``` php +$nodes = $doc->find('p'); + +echo $nodes->count(); +``` + +--- + +#### each + +``` +self each(callable $function) +``` + +##### Example + +``` php +$nodes = $doc->find('p'); + +$nodes->each(function($node){ + echo $node->nodeName . "\n"; +}); +``` + +## Licensing + +PHP DOM Wrapper is licensed by Andrew Scott under the BSD 3-Clause License, see the LICENSE file for more details. diff --git a/include/thirdparty/dom/Text.php b/include/thirdparty/dom/Text.php new file mode 100644 index 0000000..931ce4c --- /dev/null +++ b/include/thirdparty/dom/Text.php @@ -0,0 +1,24 @@ +nodeType); + } +} \ No newline at end of file diff --git a/include/thirdparty/dom/Traits/ManipulationTrait.php b/include/thirdparty/dom/Traits/ManipulationTrait.php new file mode 100644 index 0000000..183203b --- /dev/null +++ b/include/thirdparty/dom/Traits/ManipulationTrait.php @@ -0,0 +1,748 @@ +getOuterHtml(true); + } + + /** + * @param string|NodeList|\DOMNode $input + * + * @return iterable + */ + protected function inputPrepareAsTraversable(string|NodeList|\DOMNode $input): iterable { + if ($input instanceof \DOMNode) { + // Handle raw \DOMNode elements and 'convert' them into their DOMWrap/* counterpart + if (!method_exists($input, 'inputPrepareAsTraversable')) { + $input = $this->document()->importNode($input, true); + } + + $nodes = [$input]; + } else if (is_string($input)) { + $nodes = $this->nodesFromHtml($input); + } else if (is_iterable($input)) { + $nodes = $input; + } else { + throw new \InvalidArgumentException(); + } + + return $nodes; + } + + /** + * @param string|NodeList|\DOMNode $input + * @param bool $cloneForManipulate + * + * @return NodeList + */ + protected function inputAsNodeList(string|NodeList|\DOMNode $input, bool $cloneForManipulate = true): NodeList { + $nodes = $this->inputPrepareAsTraversable($input); + + $newNodes = $this->newNodeList(); + + foreach ($nodes as $node) { + if ($node->document() !== $this->document()) { + $node = $this->document()->importNode($node, true); + } + + if ($cloneForManipulate && $node->parentNode !== null) { + $node = $node->cloneNode(true); + } + + $newNodes[] = $node; + } + + return $newNodes; + } + + /** + * @param string|NodeList|\DOMNode $input + * + * @return \DOMNode|null + */ + protected function inputAsFirstNode(string|NodeList|\DOMNode $input): ?\DOMNode { + $nodes = $this->inputAsNodeList($input); + + return $nodes->findXPath('self::*')->first(); + } + + /** + * @param string $html + * + * @return NodeList + */ + protected function nodesFromHtml(string $html): NodeList { + $class = get_class($this->document()); + $doc = new $class(); + $doc->setEncoding($this->document()->getEncoding()); + $nodes = $doc->html($html)->find('body')->contents(); + + return $nodes; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * @param callable $callback + * + * @return self + */ + protected function manipulateNodesWithInput(string|NodeList|\DOMNode|callable $input, callable $callback): self { + $this->collection()->each(function($node, $index) use ($input, $callback) { + $html = $input; + + /*if ($input instanceof \DOMNode) { + if ($input->parentNode !== null) { + $html = $input->cloneNode(true); + } + } else*/if (is_callable($input)) { + $html = $input($node, $index); + } + + $newNodes = $this->inputAsNodeList($html); + + $callback($node, $newNodes); + }); + + return $this; + } + + /** + * @param string|null $selector + * + * @return NodeList + */ + public function detach(?string $selector = null): NodeList { + if (!is_null($selector)) { + $nodes = $this->find($selector, 'self::'); + } else { + $nodes = $this->collection(); + } + + $nodeList = $this->newNodeList(); + + $nodes->each(function($node) use($nodeList) { + if ($node->parent() instanceof \DOMNode) { + $nodeList[] = $node->parent()->removeChild($node); + } + }); + + $nodes->fromArray([]); + + return $nodeList; + } + + /** + * @param string|null $selector + * + * @return self + */ + public function destroy(?string $selector = null): self { + $this->detach($selector); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function substituteWith(string|NodeList|\DOMNode|callable $input): self { + $this->manipulateNodesWithInput($input, function($node, $newNodes) { + foreach ($newNodes as $newNode) { + $node->parent()->replaceChild($newNode, $node); + } + }); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return string|self + */ + public function text(string|NodeList|\DOMNode|callable|null $input = null): string|self { + if (is_null($input)) { + return $this->getText(); + } else { + return $this->setText($input); + } + } + + /** + * @return string + */ + public function getText(): string { + return (string)$this->collection()->reduce(function($carry, $node) { + return $carry . $node->textContent; + }, ''); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function setText(string|NodeList|\DOMNode|callable $input): self { + if (is_string($input)) { + $input = new Text($input); + } + + $this->manipulateNodesWithInput($input, function($node, $newNodes) { + // Remove old contents from the current node. + $node->contents()->destroy(); + + // Add new contents in it's place. + $node->appendWith(new Text($newNodes->getText())); + }); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function precede(string|NodeList|\DOMNode|callable $input): self { + $this->manipulateNodesWithInput($input, function($node, $newNodes) { + foreach ($newNodes as $newNode) { + $node->parent()->insertBefore($newNode, $node); + } + }); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function follow(string|NodeList|\DOMNode|callable $input): self { + $this->manipulateNodesWithInput($input, function($node, $newNodes) { + foreach ($newNodes as $newNode) { + if (is_null($node->following())) { + $node->parent()->appendChild($newNode); + } else { + $node->parent()->insertBefore($newNode, $node->following()); + } + } + }); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function prependWith(string|NodeList|\DOMNode|callable $input): self { + $this->manipulateNodesWithInput($input, function($node, $newNodes) { + foreach ($newNodes as $newNode) { + $node->insertBefore($newNode, $node->contents()->first()); + } + }); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function appendWith(string|NodeList|\DOMNode|callable $input): self { + $this->manipulateNodesWithInput($input, function($node, $newNodes) { + foreach ($newNodes as $newNode) { + $node->appendChild($newNode); + } + }); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode $selector + * + * @return self + */ + public function prependTo(string|NodeList|\DOMNode $selector): self { + if ($selector instanceof \DOMNode || $selector instanceof NodeList) { + $nodes = $this->inputAsNodeList($selector); + } else { + $nodes = $this->document()->find($selector); + } + + $nodes->prependWith($this); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode $selector + * + * @return self + */ + public function appendTo(string|NodeList|\DOMNode $selector): self { + if ($selector instanceof \DOMNode || $selector instanceof NodeList) { + $nodes = $this->inputAsNodeList($selector); + } else { + $nodes = $this->document()->find($selector); + } + + $nodes->appendWith($this); + + return $this; + } + + /** + * @return self + */ + public function _empty(): self { + $this->collection()->each(function($node) { + $node->contents()->destroy(); + }); + + return $this; + } + + /** + * @return NodeList|\DOMNode + */ + public function _clone(): NodeList|\DOMNode { + $clonedNodes = $this->newNodeList(); + + $this->collection()->each(function($node) use($clonedNodes) { + $clonedNodes[] = $node->cloneNode(true); + }); + + return $this->result($clonedNodes); + } + + /** + * @param string $name + * + * @return self + */ + public function removeAttr(string $name): self { + $this->collection()->each(function($node) use($name) { + if ($node instanceof \DOMElement) { + $node->removeAttribute($name); + } + }); + + return $this; + } + + /** + * @param string $name + * + * @return bool + */ + public function hasAttr(string $name): bool { + return (bool)$this->collection()->reduce(function($carry, $node) use ($name) { + if ($node->hasAttribute($name)) { + return true; + } + + return $carry; + }, false); + } + + /** + * @internal + * + * @param string $name + * + * @return string + */ + public function getAttr(string $name): string { + $node = $this->collection()->first(); + + if (!($node instanceof \DOMElement)) { + return ''; + } + + return $node->getAttribute($name); + } + + /** + * @internal + * + * @param string $name + * @param mixed $value + * + * @return self + */ + public function setAttr(string $name, mixed $value): self { + $this->collection()->each(function($node) use($name, $value) { + if ($node instanceof \DOMElement) { + $node->setAttribute($name, (string)$value); + } + }); + + return $this; + } + + /** + * @param string $name + * @param mixed $value + * + * @return self|string + */ + public function attr(string $name, mixed $value = null): string|self { + if (is_null($value)) { + return $this->getAttr($name); + } else { + return $this->setAttr($name, $value); + } + } + + /** + * @internal + * + * @param string $name + * @param string|callable $value + * @param bool $addValue + */ + protected function _pushAttrValue(string $name, string|callable $value, bool $addValue = false): void { + $this->collection()->each(function($node, $index) use($name, $value, $addValue) { + if ($node instanceof \DOMElement) { + $attr = $node->getAttribute($name); + + if (is_callable($value)) { + $value = $value($node, $index, $attr); + } + + // Remove any existing instances of the value, or empty values. + $values = array_filter(explode(' ', $attr), function($_value) use($value) { + if (strcasecmp($_value, $value) == 0 || empty($_value)) { + return false; + } + + return true; + }); + + // If required add attr value to array + if ($addValue) { + $values[] = $value; + } + + // Set the attr if we either have values, or the attr already + // existed (we might be removing classes). + // + // Don't set the attr if it doesn't already exist. + if (!empty($values) || $node->hasAttribute($name)) { + $node->setAttribute($name, implode(' ', $values)); + } + } + }); + } + + /** + * @param string|callable $class + * + * @return self + */ + public function addClass(string|callable $class): self { + $this->_pushAttrValue('class', $class, true); + + return $this; + } + + /** + * @param string|callable $class + * + * @return self + */ + public function removeClass(string|callable $class): self { + $this->_pushAttrValue('class', $class); + + return $this; + } + + /** + * @param string $class + * + * @return bool + */ + public function hasClass(string $class): bool { + return (bool)$this->collection()->reduce(function($carry, $node) use ($class) { + $attr = $node->getAttr('class'); + + return array_reduce(explode(' ', (string)$attr), function($carry, $item) use ($class) { + if (strcasecmp($item, $class) == 0) { + return true; + } + + return $carry; + }, false); + }, false); + } + + /** + * @param Element $node + * + * @return \SplStack + */ + protected function _getFirstChildWrapStack(Element $node): \SplStack { + $stack = new \SplStack; + + do { + // Push our current node onto the stack + $stack->push($node); + + // Get the first element child node + $node = $node->children()->first(); + } while ($node instanceof Element); + + // Get the top most node. + return $stack; + } + + /** + * @param Element $node + * + * @return \SplStack + */ + protected function _prepareWrapStack(Element $node): \SplStack { + // Generate a stack (root to leaf) of the wrapper. + // Includes only first element nodes / first element children. + $stackNodes = $this->_getFirstChildWrapStack($node); + + // Only using the first element, remove any siblings. + foreach ($stackNodes as $stackNode) { + $stackNode->siblings()->destroy(); + } + + return $stackNodes; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * @param callable $callback + */ + protected function wrapWithInputByCallback(string|NodeList|\DOMNode|callable $input, callable $callback): void { + $this->collection()->each(function($node, $index) use ($input, $callback) { + $html = $input; + + if (is_callable($input)) { + $html = $input($node, $index); + } + + $inputNode = $this->inputAsFirstNode($html); + + if ($inputNode instanceof Element) { + // Pre-process wrapper into a stack of first element nodes. + $stackNodes = $this->_prepareWrapStack($inputNode); + + $callback($node, $stackNodes); + } + }); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function wrapInner(string|NodeList|\DOMNode|callable $input): self { + $this->wrapWithInputByCallback($input, function($node, $stackNodes) { + foreach ($node->contents() as $child) { + // Remove child from the current node + $oldChild = $child->detach()->first(); + + // Add it back as a child of the top (leaf) node on the stack + $stackNodes->top()->appendWith($oldChild); + } + + // Add the bottom (root) node on the stack + $node->appendWith($stackNodes->bottom()); + }); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function wrap(string|NodeList|\DOMNode|callable $input): self { + $this->wrapWithInputByCallback($input, function($node, $stackNodes) { + // Add the new bottom (root) node after the current node + $node->follow($stackNodes->bottom()); + + // Remove the current node + $oldNode = $node->detach()->first(); + + // Add the 'current node' back inside the new top (leaf) node. + $stackNodes->top()->appendWith($oldNode); + }); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function wrapAll(string|NodeList|\DOMNode|callable $input): self { + if (!$this->collection()->count()) { + return $this; + } + + if (is_callable($input)) { + $input = $input($this->collection()->first()); + } + + $inputNode = $this->inputAsFirstNode($input); + + if (!($inputNode instanceof Element)) { + return $this; + } + + $stackNodes = $this->_prepareWrapStack($inputNode); + + // Add the new bottom (root) node before the first matched node + $this->collection()->first()->precede($stackNodes->bottom()); + + $this->collection()->each(function($node) use ($stackNodes) { + // Detach and add node back inside the new wrappers top (leaf) node. + $stackNodes->top()->appendWith($node->detach()); + }); + + return $this; + } + + /** + * @return self + */ + public function unwrap(): self { + $this->collection()->each(function($node) { + $parent = $node->parent(); + + // Replace parent node (the one we're unwrapping) with it's children. + $parent->contents()->each(function($childNode) use($parent) { + $oldChildNode = $childNode->detach()->first(); + + $parent->precede($oldChildNode); + }); + + $parent->destroy(); + }); + + return $this; + } + + /** + * @param bool $isIncludeAll + * + * @return string + */ + public function getOuterHtml(bool $isIncludeAll = false): string { + $nodes = $this->collection(); + + if (!$isIncludeAll) { + $nodes = $this->newNodeList([$nodes->first()]); + } + + return $nodes->reduce(function($carry, $node) { + return $carry . $this->document()->saveHTML($node); + }, ''); + } + + /** + * @param bool $isIncludeAll + * + * @return string + */ + public function getHtml(bool $isIncludeAll = false): string { + $nodes = $this->collection(); + + if (!$isIncludeAll) { + $nodes = $this->newNodeList([$nodes->first()]); + } + + return $nodes->contents()->reduce(function($carry, $node) { + return $carry . $this->document()->saveHTML($node); + }, ''); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return self + */ + public function setHtml(string|NodeList|\DOMNode|callable $input): self { + $this->manipulateNodesWithInput($input, function($node, $newNodes) { + // Remove old contents from the current node. + $node->contents()->destroy(); + + // Add new contents in it's place. + $node->appendWith($newNodes); + }); + + return $this; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return string|self + */ + public function html(string|NodeList|\DOMNode|callable|null $input = null): string|self { + if (is_null($input)) { + return $this->getHtml(); + } else { + return $this->setHtml($input); + } + } + + /** + * @param string|NodeList|\DOMNode $input + * + * @return NodeList + */ + public function create(string|NodeList|\DOMNode $input): NodeList { + return $this->inputAsNodeList($input); + } +} \ No newline at end of file diff --git a/include/thirdparty/dom/Traits/NodeTrait.php b/include/thirdparty/dom/Traits/NodeTrait.php new file mode 100644 index 0000000..0fb8be1 --- /dev/null +++ b/include/thirdparty/dom/Traits/NodeTrait.php @@ -0,0 +1,46 @@ +newNodeList([$this]); + } + + /** + * @return \DOMDocument + */ + public function document(): ?\DOMDocument { + if ($this->isRemoved()) { + return null; + } + + return $this->ownerDocument; + } + + /** + * @param NodeList $nodeList + * + * @return NodeList|\DOMNode|null + */ + public function result(NodeList $nodeList): NodeList|\DOMNode|null { + if ($nodeList->count()) { + return $nodeList->first(); + } + + return null; + } +} \ No newline at end of file diff --git a/include/thirdparty/dom/Traits/TraversalTrait.php b/include/thirdparty/dom/Traits/TraversalTrait.php new file mode 100644 index 0000000..8b05f4a --- /dev/null +++ b/include/thirdparty/dom/Traits/TraversalTrait.php @@ -0,0 +1,468 @@ +document(), $nodes); + } + + /** + * @param string $selector + * @param string $prefix + * + * @return NodeList + */ + public function find(string $selector, string $prefix = 'descendant::'): NodeList { + if (!self::$cssSelectorConverter) { + self::$cssSelectorConverter = new CssSelectorConverter(); + } + + return $this->findXPath(self::$cssSelectorConverter->toXPath($selector, $prefix)); + } + + /** + * @param string $xpath + * + * @return NodeList + */ + public function findXPath(string $xpath): NodeList { + $results = $this->newNodeList(); + + if ($this->isRemoved()) { + return $results; + } + + $domxpath = new \DOMXPath($this->document()); + + foreach ($this->collection() as $node) { + $results = $results->merge( + $node->newNodeList($domxpath->query($xpath, $node)) + ); + } + + return $results; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * @param bool $matchType + * + * @return NodeList + */ + protected function getNodesMatchingInput(string|NodeList|\DOMNode|callable $input, bool $matchType = true): NodeList { + if ($input instanceof NodeList || $input instanceof \DOMNode) { + $inputNodes = $this->inputAsNodeList($input, false); + + $fn = function($node) use ($inputNodes) { + return $inputNodes->exists($node); + }; + + + } elseif (is_callable($input)) { + // Since we're at the behest of the input callable, the 'matched' + // return value is always true. + $matchType = true; + + $fn = $input; + + } elseif (is_string($input)) { + $fn = function($node) use ($input) { + return $node->find($input, 'self::')->count() != 0; + }; + + } else { + throw new \InvalidArgumentException('Unexpected input value of type "' . gettype($input) . '"'); + } + + // Build a list of matching nodes. + return $this->collection()->map(function($node) use ($fn, $matchType) { + if ($fn($node) !== $matchType) { + return null; + } + + return $node; + }); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return bool + */ + public function is(string|NodeList|\DOMNode|callable $input): bool { + return $this->getNodesMatchingInput($input)->count() != 0; + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return NodeList + */ + public function not(string|NodeList|\DOMNode|callable $input): NodeList { + return $this->getNodesMatchingInput($input, false); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return NodeList + */ + public function filter(string|NodeList|\DOMNode|callable $input): NodeList { + return $this->getNodesMatchingInput($input); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return NodeList + */ + public function has(string|NodeList|\DOMNode|callable $input): NodeList { + if ($input instanceof NodeList || $input instanceof \DOMNode) { + $inputNodes = $this->inputAsNodeList($input, false); + + $fn = function($node) use ($inputNodes) { + $descendantNodes = $node->find('*', 'descendant::'); + + // Determine if we have a descendant match. + return $inputNodes->reduce(function($carry, $inputNode) use ($descendantNodes) { + // Match descendant nodes against input nodes. + if ($descendantNodes->exists($inputNode)) { + return true; + } + + return $carry; + }, false); + }; + + } elseif (is_string($input)) { + $fn = function($node) use ($input) { + return $node->find($input, 'descendant::')->count() != 0; + }; + + } elseif (is_callable($input)) { + $fn = $input; + + } else { + throw new \InvalidArgumentException('Unexpected input value of type "' . gettype($input) . '"'); + } + + return $this->getNodesMatchingInput($fn); + } + + /** + * @param string|NodeList|\DOMNode|callable $selector + * + * @return \DOMNode|null + */ + public function preceding(string|NodeList|\DOMNode|callable|null $selector = null): ?\DOMNode { + return $this->precedingUntil(null, $selector)->first(); + } + + /** + * @param string|NodeList|\DOMNode|callable $selector + * + * @return NodeList + */ + public function precedingAll(string|NodeList|\DOMNode|callable|null $selector = null): NodeList { + return $this->precedingUntil(null, $selector); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * @param string|NodeList|\DOMNode|callable $selector + * + * @return NodeList + */ + public function precedingUntil(string|NodeList|\DOMNode|callable|null $input = null, string|NodeList|\DOMNode|callable|null $selector = null): NodeList { + return $this->_walkPathUntil('previousSibling', $input, $selector); + } + + /** + * @param string|NodeList|\DOMNode|callable $selector + * + * @return \DOMNode|null + */ + public function following(string|NodeList|\DOMNode|callable|null $selector = null): ?\DOMNode { + return $this->followingUntil(null, $selector)->first(); + } + + /** + * @param string|NodeList|\DOMNode|callable $selector + * + * @return NodeList + */ + public function followingAll(string|NodeList|\DOMNode|callable|null $selector = null): NodeList { + return $this->followingUntil(null, $selector); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * @param string|NodeList|\DOMNode|callable $selector + * + * @return NodeList + */ + public function followingUntil(string|NodeList|\DOMNode|callable|null $input = null, string|NodeList|\DOMNode|callable|null $selector = null): NodeList { + return $this->_walkPathUntil('nextSibling', $input, $selector); + } + + /** + * @param string|NodeList|\DOMNode|callable $selector + * + * @return NodeList + */ + public function siblings(string|NodeList|\DOMNode|callable|null $selector = null): NodeList { + $results = $this->collection()->reduce(function($carry, $node) use ($selector) { + return $carry->merge( + $node->precedingAll($selector)->merge( + $node->followingAll($selector) + ) + ); + }, $this->newNodeList()); + + return $results; + } + + /** + * NodeList is only array like. Removing items using foreach() has undesired results. + * + * @return NodeList + */ + public function children(): NodeList { + $results = $this->collection()->reduce(function($carry, $node) { + return $carry->merge( + $node->findXPath('child::*') + ); + }, $this->newNodeList()); + + return $results; + } + + /** + * @param string|NodeList|\DOMNode|callable $selector + * + * @return Document|Element|NodeList|null + */ + public function parent(string|NodeList|\DOMNode|callable|null $selector = null): Document|Element|NodeList|null { + $results = $this->_walkPathUntil('parentNode', null, $selector, self::$MATCH_TYPE_FIRST); + + return $this->result($results); + } + + /** + * @param int $index + * + * @return \DOMNode|null + */ + public function eq(int $index): ?\DOMNode { + if ($index < 0) { + $index = $this->collection()->count() + $index; + } + + return $this->collection()->offsetGet($index); + } + + /** + * @param string $selector + * + * @return NodeList + */ + public function parents(string $selector = null): NodeList { + return $this->parentsUntil(null, $selector); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * @param string|NodeList|\DOMNode|callable $selector + * + * @return NodeList + */ + public function parentsUntil(string|NodeList|\DOMNode|callable|null $input = null, string|NodeList|\DOMNode|callable|null $selector = null): NodeList { + return $this->_walkPathUntil('parentNode', $input, $selector); + } + + /** + * @return \DOMNode + */ + public function intersect(): \DOMNode { + if ($this->collection()->count() < 2) { + return $this->collection()->first(); + } + + $nodeParents = []; + + // Build a multi-dimensional array of the collection nodes parent elements + $this->collection()->each(function($node) use(&$nodeParents) { + $nodeParents[] = $node->parents()->unshift($node)->toArray(); + }); + + // Find the common parent + $diff = call_user_func_array('array_uintersect', array_merge($nodeParents, [function($a, $b) { + return strcmp(spl_object_hash($a), spl_object_hash($b)); + }])); + + return array_shift($diff); + } + + /** + * @param string|NodeList|\DOMNode|callable $input + * + * @return Document|Element|NodeList|null + */ + public function closest(string|NodeList|\DOMNode|callable|null $input): Document|Element|NodeList|null { + $results = $this->_walkPathUntil('parentNode', $input, null, self::$MATCH_TYPE_LAST); + + return $this->result($results); + } + + /** + * NodeList is only array like. Removing items using foreach() has undesired results. + * + * @return NodeList + */ + public function contents(): NodeList { + $results = $this->collection()->reduce(function($carry, $node) { + if ($node->isRemoved()) { + return $carry; + } + + return $carry->merge( + $node->newNodeList($node->childNodes) + ); + }, $this->newNodeList()); + + return $results; + } + + /** + * @param string|NodeList|\DOMNode $input + * + * @return NodeList + */ + public function add(string|NodeList|\DOMNode $input): NodeList { + $nodes = $this->inputAsNodeList($input); + + $results = $this->collection()->merge( + $nodes + ); + + return $results; + } + + /** @var int */ + private static $MATCH_TYPE_FIRST = 1; + + /** @var int */ + private static $MATCH_TYPE_LAST = 2; + + /** + * @param \DOMNode $baseNode + * @param string $property + * @param string|NodeList|\DOMNode|callable $input + * @param string|NodeList|\DOMNode|callable $selector + * @param int $matchType + * + * @return NodeList + */ + protected function _buildNodeListUntil(\DOMNode $baseNode, string $property, string|NodeList|\DOMNode|callable|null $input = null, string|NodeList|\DOMNode|callable|null $selector = null, ?int $matchType = null): NodeList { + $resultNodes = $this->newNodeList(); + + // Get our first node + $node = $baseNode->$property; + + // Keep looping until we are out of nodes. + // Allow either FIRST to reach \DOMDocument. Others that return multiple should ignore it. + while ($node instanceof \DOMNode && ($matchType === self::$MATCH_TYPE_FIRST || !($node instanceof \DOMDocument))) { + // Filter nodes if not matching last + if ($matchType != self::$MATCH_TYPE_LAST && (is_null($selector) || $node->is($selector))) { + $resultNodes[] = $node; + } + + // 'Until' check or first match only + if ($matchType == self::$MATCH_TYPE_FIRST || (!is_null($input) && $node->is($input))) { + // Set last match + if ($matchType == self::$MATCH_TYPE_LAST) { + $resultNodes[] = $node; + } + + break; + } + + // Find the next node + $node = $node->{$property}; + } + + return $resultNodes; + } + + /** + * @param iterable $nodeLists + * + * @return NodeList + */ + protected function _uniqueNodes(iterable $nodeLists): NodeList { + $resultNodes = $this->newNodeList(); + + // Loop through our array of NodeLists + foreach ($nodeLists as $nodeList) { + // Each node in the NodeList + foreach ($nodeList as $node) { + // We're only interested in unique nodes + if (!$resultNodes->exists($node)) { + $resultNodes[] = $node; + } + } + } + + // Sort resulting NodeList: outer-most => inner-most. + return $resultNodes->reverse(); + } + + /** + * @param string $property + * @param string|NodeList|\DOMNode|callable $input + * @param string|NodeList|\DOMNode|callable $selector + * @param int $matchType + * + * @return NodeList + */ + protected function _walkPathUntil(string $property, string|NodeList|\DOMNode|callable|null $input = null, string|NodeList|\DOMNode|callable|null $selector = null, ?int $matchType = null): NodeList { + $nodeLists = []; + + $this->collection()->each(function($node) use($property, $input, $selector, $matchType, &$nodeLists) { + $nodeLists[] = $this->_buildNodeListUntil($node, $property, $input, $selector, $matchType); + }); + + return $this->_uniqueNodes($nodeLists); + } +} \ No newline at end of file