mirror of
https://github.com/BookStackApp/BookStack.git
synced 2025-08-09 10:22:51 +03:00
Filtered scripts in custom HTML head for exports
Since it appeared to cause problems in some scenarios. Related to #2490
This commit is contained in:
71
app/Util/HtmlContentFilter.php
Normal file
71
app/Util/HtmlContentFilter.php
Normal file
@@ -0,0 +1,71 @@
|
||||
<?php namespace BookStack\Util;
|
||||
|
||||
use DOMDocument;
|
||||
use DOMNode;
|
||||
use DOMNodeList;
|
||||
use DOMXPath;
|
||||
|
||||
class HtmlContentFilter
|
||||
{
|
||||
/**
|
||||
* Remove all of the script elements from the given HTML.
|
||||
*/
|
||||
public static function removeScripts(string $html): string
|
||||
{
|
||||
if (empty($html)) {
|
||||
return $html;
|
||||
}
|
||||
|
||||
libxml_use_internal_errors(true);
|
||||
$doc = new DOMDocument();
|
||||
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
|
||||
$xPath = new DOMXPath($doc);
|
||||
|
||||
// Remove standard script tags
|
||||
$scriptElems = $xPath->query('//script');
|
||||
static::removeNodes($scriptElems);
|
||||
|
||||
// Remove clickable links to JavaScript URI
|
||||
$badLinks = $xPath->query('//*[contains(@href, \'javascript:\')]');
|
||||
static::removeNodes($badLinks);
|
||||
|
||||
// Remove forms with calls to JavaScript URI
|
||||
$badForms = $xPath->query('//*[contains(@action, \'javascript:\')] | //*[contains(@formaction, \'javascript:\')]');
|
||||
static::removeNodes($badForms);
|
||||
|
||||
// Remove meta tag to prevent external redirects
|
||||
$metaTags = $xPath->query('//meta[contains(@content, \'url\')]');
|
||||
static::removeNodes($metaTags);
|
||||
|
||||
// Remove data or JavaScript iFrames
|
||||
$badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]');
|
||||
static::removeNodes($badIframes);
|
||||
|
||||
// Remove 'on*' attributes
|
||||
$onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]');
|
||||
foreach ($onAttributes as $attr) {
|
||||
/** @var \DOMAttr $attr*/
|
||||
$attrName = $attr->nodeName;
|
||||
$attr->parentNode->removeAttribute($attrName);
|
||||
}
|
||||
|
||||
$html = '';
|
||||
$topElems = $doc->documentElement->childNodes->item(0)->childNodes;
|
||||
foreach ($topElems as $child) {
|
||||
$html .= $doc->saveHTML($child);
|
||||
}
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removed all of the given DOMNodes.
|
||||
*/
|
||||
static protected function removeNodes(DOMNodeList $nodes): void
|
||||
{
|
||||
foreach ($nodes as $node) {
|
||||
$node->parentNode->removeChild($node);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user