Skip to content

Commit a031ede

Browse files
committed
Fixed old deprecated encoding convert on HTML doc load
1 parent 2724b28 commit a031ede

File tree

4 files changed

+16
-23
lines changed

4 files changed

+16
-23
lines changed

app/Entities/Tools/PageContent.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -449,8 +449,8 @@ protected function loadDocumentFromHtml(string $html): DOMDocument
449449
{
450450
libxml_use_internal_errors(true);
451451
$doc = new DOMDocument();
452-
$html = '<body>' . $html . '</body>';
453-
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
452+
$html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
453+
$doc->loadHTML($html);
454454

455455
return $doc;
456456
}

app/References/CrossLinkParser.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ protected function getLinksFromContent(string $html): array
5454
{
5555
$links = [];
5656

57-
$html = '<body>' . $html . '</body>';
57+
$html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
5858
libxml_use_internal_errors(true);
5959
$doc = new DOMDocument();
60-
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
60+
$doc->loadHTML($html);
6161

6262
$xPath = new DOMXPath($doc);
6363
$anchors = $xPath->query('//a[@href]');

app/Search/SearchIndex.php

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,18 @@ class SearchIndex
1515
{
1616
/**
1717
* A list of delimiter characters used to break-up parsed content into terms for indexing.
18-
*
19-
* @var string
2018
*/
21-
public static $delimiters = " \n\t.,!?:;()[]{}<>`'\"";
19+
public static string $delimiters = " \n\t.,!?:;()[]{}<>`'\"";
2220

23-
/**
24-
* @var EntityProvider
25-
*/
26-
protected $entityProvider;
27-
28-
public function __construct(EntityProvider $entityProvider)
29-
{
30-
$this->entityProvider = $entityProvider;
21+
public function __construct(
22+
protected EntityProvider $entityProvider
23+
) {
3124
}
3225

3326
/**
3427
* Index the given entity.
3528
*/
36-
public function indexEntity(Entity $entity)
29+
public function indexEntity(Entity $entity): void
3730
{
3831
$this->deleteEntityTerms($entity);
3932
$terms = $this->entityToTermDataArray($entity);
@@ -45,7 +38,7 @@ public function indexEntity(Entity $entity)
4538
*
4639
* @param Entity[] $entities
4740
*/
48-
public function indexEntities(array $entities)
41+
public function indexEntities(array $entities): void
4942
{
5043
$terms = [];
5144
foreach ($entities as $entity) {
@@ -69,7 +62,7 @@ public function indexEntities(array $entities)
6962
*
7063
* @param callable(Entity, int, int):void|null $progressCallback
7164
*/
72-
public function indexAllEntities(?callable $progressCallback = null)
65+
public function indexAllEntities(?callable $progressCallback = null): void
7366
{
7467
SearchTerm::query()->truncate();
7568

@@ -101,7 +94,7 @@ public function indexAllEntities(?callable $progressCallback = null)
10194
/**
10295
* Delete related Entity search terms.
10396
*/
104-
public function deleteEntityTerms(Entity $entity)
97+
public function deleteEntityTerms(Entity $entity): void
10598
{
10699
$entity->searchTerms()->delete();
107100
}
@@ -145,12 +138,12 @@ protected function generateTermScoreMapFromHtml(string $html): array
145138
'h6' => 1.5,
146139
];
147140

148-
$html = '<body>' . $html . '</body>';
141+
$html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
149142
$html = str_ireplace(['<br>', '<br />', '<br/>'], "\n", $html);
150143

151144
libxml_use_internal_errors(true);
152145
$doc = new DOMDocument();
153-
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
146+
$doc->loadHTML($html);
154147

155148
$topElems = $doc->documentElement->childNodes->item(0)->childNodes;
156149
/** @var DOMNode $child */

app/Util/HtmlContentFilter.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ public static function removeScripts(string $html): string
1919
return $html;
2020
}
2121

22-
$html = '<body>' . $html . '</body>';
22+
$html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
2323
libxml_use_internal_errors(true);
2424
$doc = new DOMDocument();
25-
$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
25+
$doc->loadHTML($html);
2626
$xPath = new DOMXPath($doc);
2727

2828
// Remove standard script tags

0 commit comments

Comments
 (0)