<?php

declare(strict_types=1);

/**
 * @author Amasty Team
 * @copyright Copyright (c) Amasty (https://www.amasty.com)
 * @package GDPR Hyva Compatibility (System)
 */

namespace Amasty\GdprHyva\Model;

use Psr\Log\LoggerInterface;

/**
 * Escape javascript and prohibited tags from tinymce content
 * @see \Magento\Framework\Escaper
 */
class XssEscaper
{
    private const NOT_ALLOWED_TAGS = ['script', 'img', 'embed', 'iframe', 'video', 'source', 'object', 'audio'];

    /**
     * @var LoggerInterface
     */
    private LoggerInterface $logger;

    /**
     * @var string
     */
    private static string $xssFiltrationPattern =
        '/((javascript(\\\\x3a|:|%3A))|(data(\\\\x3a|:|%3A))|(vbscript:))|'
        . '((\\\\x6A\\\\x61\\\\x76\\\\x61\\\\x73\\\\x63\\\\x72\\\\x69\\\\x70\\\\x74(\\\\x3a|:|%3A))|'
        . '(\\\\x64\\\\x61\\\\x74\\\\x61(\\\\x3a|:|%3A)))/i';

    public function __construct(
        LoggerInterface $logger
    ) {
        $this->logger = $logger;
    }

    public function escapeTinyMceHtml(?string $data): string
    {
        if (empty($data)) {
            return '';
        }

        $domDocument = new \DOMDocument('1.0', 'UTF-8');
        $wrapperElementId = uniqid();
        $data = $this->escapeScriptIdentifiers($this->prepareUnescapedCharacters($data));
        $string = htmlentities($data);

        try {
            $domDocument->loadHTML(
                '<html><body id="' . $wrapperElementId . '">' . $string . '</body></html>'
            );
        } catch (\Exception $e) {
            $this->logger->critical($e);
        }

        $this->removeNotAllowedTags($domDocument);

        $result = mb_convert_encoding($domDocument->saveHTML(), 'UTF-8', 'HTML-ENTITIES');
        preg_match('/<body id="' . $wrapperElementId . '">(.+)<\/body><\/html>$/si', $result, $matches);
        return !empty($matches) ? $matches[1] : '';
    }

    /**
     * Remove `javascript:`, `vbscript:`, `data:` words from the string.
     */
    private function escapeScriptIdentifiers(string $data): string
    {
        $filteredData = preg_replace('/[\x00-\x1F\x7F\xA0]/u', '', $data);
        if ($filteredData === false || $filteredData === '') {
            return '';
        }

        $filteredData = preg_replace(self::$xssFiltrationPattern, ':', $filteredData);
        if ($filteredData === false) {
            return '';
        }

        if (preg_match(self::$xssFiltrationPattern, $filteredData)) {
            $filteredData = $this->escapeScriptIdentifiers($filteredData);
        }

        return $filteredData;
    }

    /**
     * Used to replace characters, that mb_convert_encoding will not process
     *
     * @param string $data
     * @return string|null
     */
    private function prepareUnescapedCharacters(string $data): ?string
    {
        $patterns = ['/\&/u'];
        $replacements = ['&amp;'];
        return \preg_replace($patterns, $replacements, $data);
    }

    /**
     * Remove not allowed tags
     *
     * @param \DOMDocument $domDocument
     * @return void
     */
    private function removeNotAllowedTags(\DOMDocument $domDocument): void
    {
        $xpath = new \DOMXPath($domDocument);
        $nodes = $xpath->query(
            '//node()[name() = \''
            . implode('\' and name() = \'', self::NOT_ALLOWED_TAGS)
            . '\']'
        );

        foreach ($nodes as $node) {
                $node->parentNode->removeChild($node);
        }
    }
}
