Skip to content

Commit

Permalink
DomQuery: uses PHP 8.4 HTML DOM
Browse files Browse the repository at this point in the history
  • Loading branch information
dg committed Nov 23, 2024
1 parent c118637 commit b9c0ed6
Showing 1 changed file with 26 additions and 15 deletions.
41 changes: 26 additions & 15 deletions src/Framework/DomQuery.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

namespace Tester;

use Dom;


/**
* DomQuery simplifies querying (X)HTML documents.
Expand All @@ -20,24 +22,33 @@ class DomQuery extends \SimpleXMLElement
*/
public static function fromHtml(string $html): self
{
if (!str_contains($html, '<')) {
$html = '<body>' . $html;
}
$old = libxml_use_internal_errors(true);
libxml_clear_errors();

// parse these elements as void
$html = preg_replace('#<(keygen|source|track|wbr)(?=\s|>)((?:"[^"]*"|\'[^\']*\'|[^"\'>])*+)(?<!/)>#', '<$1$2 />', $html);
if (PHP_VERSION_ID < 80400) {
if (!str_contains($html, '<')) {
$html = '<body>' . $html;
}

// fix parsing of </ inside scripts
$html = preg_replace_callback(
'#(<script(?=\s|>)(?:"[^"]*"|\'[^\']*\'|[^"\'>])*+>)(.*?)(</script>)#s',
fn(array $m): string => $m[1] . str_replace('</', '<\/', $m[2]) . $m[3],
$html,
);
// parse these elements as void
$html = preg_replace('#<(keygen|source|track|wbr)(?=\s|>)((?:"[^"]*"|\'[^\']*\'|[^"\'>])*+)(?<!/)>#', '<$1$2 />', $html);

// fix parsing of </ inside scripts
$html = preg_replace_callback(
'#(<script(?=\s|>)(?:"[^"]*"|\'[^\']*\'|[^"\'>])*+>)(.*?)(</script>)#s',
fn(array $m): string => $m[1] . str_replace('</', '<\/', $m[2]) . $m[3],
$html,
);

$dom = new \DOMDocument;
$dom->loadHTML($html);
} else {
if (!preg_match('~<!DOCTYPE~i', $html)) {
$html = '<!DOCTYPE html>' . $html;
}
$dom = Dom\HTMLDocument::createFromString($html, Dom\HTML_NO_DEFAULT_NS);
}

$dom = new \DOMDocument;
$old = libxml_use_internal_errors(true);
libxml_clear_errors();
$dom->loadHTML($html);
$errors = libxml_get_errors();
libxml_use_internal_errors($old);

Expand Down

0 comments on commit b9c0ed6

Please sign in to comment.