Skip to content

Commit d9c4137

Browse files
committed
Added ability to handle sibling selectors to tag::find().
Updated htmldoc::collection() to only store unique tags, as sibling `find()` operations can produce the same node multiple times.
1 parent 18b843f commit d9c4137

File tree

3 files changed

+46
-9
lines changed

3 files changed

+46
-9
lines changed

src/htmldoc.php

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,15 @@ public function text() : string {
490490
* @return void
491491
*/
492492
protected function collection(array $nodes) : void {
493-
$this->children = $nodes;
493+
494+
// only store unique nodes as some find operations can produce the same node multiple times
495+
$unique = [];
496+
foreach ($nodes AS $item) {
497+
if (!\in_array($item, $unique, true)) {
498+
$unique[] = $item;
499+
}
500+
}
501+
$this->children = $unique;
494502
}
495503

496504
/**

src/tokens/tag.php

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ public function find(array $selector, bool $searchChildren = true) : array {
641641
}
642642

643643
// pass rest of selector to level below
644-
if ($item['join'] && $i) {
644+
if (\in_array($item['join'], [' ', '>'], true) && $i) {
645645
$match = false;
646646
$childselector = \array_slice($selector, $i);
647647
foreach ($this->children AS $child) {
@@ -651,6 +651,23 @@ public function find(array $selector, bool $searchChildren = true) : array {
651651
}
652652
break;
653653

654+
// find siblings
655+
} elseif (\in_array($item['join'], ['+', '~'], true) && $i) {
656+
$match = false;
657+
$siblingselector = \array_slice($selector, $i);
658+
$search = false;
659+
foreach ($this->parent->children AS $sibling) {
660+
if (!$search && $sibling === $this) {
661+
$search = true;
662+
} elseif ($search && \get_class($sibling) === 'hexydec\\html\\tag') {
663+
$found = \array_merge($found, $sibling->find($siblingselector));
664+
if ($item['join'] === '+') {
665+
break;
666+
}
667+
}
668+
}
669+
break;
670+
654671
// match tag
655672
} elseif (!empty($item['tag']) && $item['tag'] !== '*') {
656673
if ($item['tag'] !== $this->tagName) {

tests/findHtmldocTest.php

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,23 @@ public function testCanFindElements() {
1919
$this->assertEquals($doc->length, 4, 'Can count elements');
2020
// var_dump($doc->find('title'));
2121
$tests = [
22+
23+
// basic selectors
2224
'title' => '<title>Find</title>',
2325
'.find' => '<div class="find"><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a></div>',
2426
'#first' => '<div id="first" class="first">First</div>',
27+
'.first, .find__heading, .find__paragraph' => '<div id="first" class="first">First</div><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
28+
29+
// combination selectors
30+
'body .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
31+
'body > .find__paragraph' => null,
32+
'.find > .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
33+
'.find__paragraph + a' => '<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
34+
'div[data-attr] ~ div' => '<div data-attr="">attr</div><div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div><div data-word="one two three four">attr</div>',
35+
'.find h1 ~ a' => '<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
36+
'.attributes div ~ div' => '<div data-attr="">attr</div><div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div><div data-word="one two three four">attr</div>',
37+
38+
// attribute selectors
2539
'#first[class]' => '<div id="first" class="first">First</div>',
2640
'[class=first]' => '<div id="first" class="first">First</div>',
2741
'[class^=find]' => '<div class="find"><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a></div><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
@@ -34,18 +48,16 @@ public function testCanFindElements() {
3448
'a[href$="://github.com/hexydec/htmldoc"]' => null,
3549
'a[href$="://github.com/Hexydec/Htmldoc/"]' => null,
3650
'a[href$="://github.com/Hexydec/Htmldoc/" i]' => '<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
51+
'[data-attr]' => '<div data-attr>attr</div><div data-attr="">attr</div><div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
52+
'[data-attr|=attr]' => '<div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
53+
'[data-word~=three]' => '<div data-word="one two three four">attr</div>',
54+
55+
// pseudo selectors
3756
'.positions div:first-child' => '<div id="first" class="first">First</div>',
3857
'.positions div:last-child' => '<div class="last">Last</div>',
39-
'.first, .find__heading, .find__paragraph' => '<div id="first" class="first">First</div><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
40-
'body .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
41-
'body > .find__paragraph' => null,
42-
'.find > .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
4358
'title:not([class])' => '<title>Find</title>',
4459
'.positions div:not(.find)' => '<div id="first" class="first">First</div><div class="last">Last</div>',
4560
'body section:not(:first-child) div:last-child' => '<div data-word="one two three four">attr</div>',
46-
'[data-attr]' => '<div data-attr>attr</div><div data-attr="">attr</div><div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
47-
'[data-attr|=attr]' => '<div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
48-
'[data-word~=three]' => '<div data-word="one two three four">attr</div>'
4961
];
5062
foreach ($tests AS $key => $item) {
5163
$this->assertEquals($item, $doc->find($key)->html());

0 commit comments

Comments
 (0)