Skip to content

Commit e1faf88

Browse files
committed
Updated attribute selectors to fix bugs with how attributes are handled when they are set with no value or with no equals.
Fixed issue in $= comparison attribute selector where it didn't search for the last occurrence. Added new comparison operators in attribute selectors - |= and ~=. Updated and added tests.
1 parent 0daa31b commit e1faf88

File tree

4 files changed

+84
-56
lines changed

4 files changed

+84
-56
lines changed

src/helpers/selector.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ class selector {
1010
*/
1111
protected static array $tokens = [
1212
'quotes' => '(?<!\\\\)"(?:[^"\\\\]++|\\\\.)*+"',
13+
'comparison' => '[\\^*$<>|~]?=', // comparison operators for media queries or attribute selectors
1314
'join' => '\\s*[>+~]\\s*',
14-
'comparison' => '[\\^*$<>]?=', // comparison operators for media queries or attribute selectors
1515
'squareopen' => '\\[',
1616
'squareclose' => '\\]',
1717
'bracketopen' => '\\(',
@@ -20,7 +20,7 @@ class selector {
2020
'pseudo' => ':[A-Za-z-]++',
2121
'id' => '#[^ +>\.#{\\[,]++',
2222
'class' => '\.[^ +>\.#{\\[\\(\\),]++',
23-
'string' => '\\*|[^\\[\\]{}\\(\\):;,>+=~\\^$!" #\\.*]++',
23+
'string' => '\\*|[^\\[\\]{}\\(\\):;,>+=~|\\^$!" #\\.*]++',
2424
'whitespace' => '\s++',
2525
];
2626

src/tokens/tag.php

Lines changed: 49 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -677,64 +677,78 @@ public function find(array $selector, bool $searchChildren = true) : array {
677677
} elseif (!empty($item['attribute'])) {
678678

679679
// check if attribute exists
680-
if (empty($this->attributes[$item['attribute']])) {
680+
if (!array_key_exists($item['attribute'], $this->attributes)) {
681681
$match = false;
682682
break;
683683
} elseif (!empty($item['value'])) {
684-
$current = $this->attributes[$item['attribute']];
685-
switch ($item['comparison']) {
686684

687-
// exact match
688-
case '=':
689-
if ($item['sensitive']) {
690-
if ($current !== $item['value']) {
685+
// if current value is null, it won't match
686+
if (($current = $this->attributes[$item['attribute']]) === null) {
687+
$match = false;
688+
break;
689+
690+
// compare
691+
} else {
692+
switch ($item['comparison']) {
693+
694+
// exact match
695+
case '=':
696+
if ($item['sensitive']) {
697+
if ($current !== $item['value']) {
698+
$match = false;
699+
break;
700+
}
701+
} elseif (\mb_strtolower($current) !== \mb_strtolower($item['value'])) {
691702
$match = false;
692703
break;
693704
}
694-
} elseif (\mb_strtolower($current) !== \mb_strtolower($item['value'])) {
695-
$match = false;
696705
break;
697-
}
698-
break;
699706

700-
// match start
701-
case '^=':
702-
if ($item['sensitive']) {
703-
if (\mb_strpos($current, $item['value']) !== 0) {
707+
// match start
708+
case '^=':
709+
$pos = $item['sensitive'] ? \mb_strpos($current, $item['value']) : \mb_stripos($current, $item['value']);
710+
if ($pos !== 0) {
704711
$match = false;
705712
break;
706713
}
707-
} elseif (\mb_stripos($current, $item['value']) !== 0) {
708-
$match = false;
709714
break;
710-
}
711-
break;
712715

713-
// match within
714-
case '*=':
715-
if ($item['sensitive']) {
716-
if (\mb_strpos($current, $item['value']) === false) {
716+
// match word
717+
case '~=':
718+
$current =' '.$current.' ';
719+
$item['value'] = ' '.$item['value'].' ';
720+
721+
// match within
722+
case '*=':
723+
$pos = $item['sensitive'] ? \mb_strpos($current, $item['value']) : \mb_stripos($current, $item['value']);
724+
if ($pos === false) {
717725
$match = false;
718726
break;
719727
}
720-
} elseif (\mb_stripos($current, $item['value']) === false) {
721-
$match = false;
722728
break;
723-
}
724-
break;
725729

726-
// match end
727-
case '$=':
728-
if ($item['sensitive']) {
729-
if (\mb_strpos($current, $item['value']) !== \mb_strlen($current) - \mb_strlen($item['value'])) {
730+
// match end
731+
case '$=':
732+
$pos = $item['sensitive'] ? \mb_strrpos($current, $item['value']) : \mb_strripos($current, $item['value']);
733+
if ($pos !== \mb_strlen($current) - \mb_strlen($item['value'])) {
730734
$match = false;
731735
break;
732736
}
733-
} elseif (\mb_stripos($current, $item['value']) !== \mb_strlen($current) - \mb_strlen($item['value'])) {
734-
$match = false;
735737
break;
736-
}
737-
break;
738+
739+
// match subcode
740+
case '|=':
741+
if ($item['sensitive']) {
742+
if ($current !== $item['value'] && \mb_strpos($current, $item['value'].'-') !== 0) {
743+
$match = false;
744+
break;
745+
}
746+
} elseif (\mb_strtolower($current) !== \mb_strtolower($item['value']) && \mb_stripos($current, $item['value'].'-') !== 0) {
747+
$match = false;
748+
break;
749+
}
750+
break;
751+
}
738752
}
739753
}
740754

tests/findHtmldocTest.php

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,22 @@ public function testCanFindElements() {
2929
'[class$=heading]' => '<h1 class="find__heading">Heading</h1>',
3030
'h1[class$=heading]' => '<h1 class="find__heading">Heading</h1>',
3131
'html h1[class$=heading]' => '<h1 class="find__heading">Heading</h1>',
32+
'a[href]' => '<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
3233
'a[href$="://github.com/hexydec/htmldoc/"]' => '<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
3334
'a[href$="://github.com/hexydec/htmldoc"]' => null,
34-
'a[href$="://github.com/Hexydec/Htmldoc"]' => null,
35+
'a[href$="://github.com/Hexydec/Htmldoc/"]' => null,
3536
'a[href$="://github.com/Hexydec/Htmldoc/" i]' => '<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>',
36-
'div:first-child' => '<div id="first" class="first">First</div>',
37-
// 'div:last-child' => '<div class="last">Last</div>',
37+
'.positions div:first-child' => '<div id="first" class="first">First</div>',
38+
'.positions div:last-child' => '<div class="last">Last</div>',
3839
'.first, .find__heading, .find__paragraph' => '<div id="first" class="first">First</div><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
3940
'body .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
4041
'body > .find__paragraph' => null,
4142
'.find > .find__paragraph' => '<p class="find__paragraph" title="This is a paragraph">Paragraph</p>',
4243
'title:not([class])' => '<title>Find</title>',
43-
'div:not(.find)' => '<div id="first" class="first">First</div><div class="last">Last</div>',
44+
'.positions div:not(.find)' => '<div id="first" class="first">First</div><div class="last">Last</div>',
45+
'[data-attr]' => '<div data-attr>attr</div><div data-attr="">attr</div><div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
46+
'[data-attr|=attr]' => '<div data-attr="attr">attr</div><div data-attr="attr-value1">attr</div><div data-attr="attr-value2">attr</div>',
47+
'[data-word~=three]' => '<div data-word="one two three four">attr</div>'
4448
];
4549
foreach ($tests AS $key => $item) {
4650
$this->assertEquals($item, $doc->find($key)->html());
@@ -62,18 +66,18 @@ public function testCanTraverseElements() {
6266
'close' => false // don't write close tags where possible
6367
));
6468

65-
$this->assertEquals('<div id="first" class="first">First</div>', $doc->find('body > *')->first()->html(), 'Can find first element');
66-
$this->assertEquals('<div class="last">Last</div>', $doc->find('body > *')->last()->html(), 'Can find last element');
67-
$this->assertEquals('<div class="find"><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a></div>', $doc->find('body > *')->eq(1)->html(), 'Can specific element');
68-
$this->assertEquals('<div class="find"><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a></div>', $doc->find('body > *')->eq(-2)->html(), 'Can specific element');
69+
$this->assertEquals('<div id="first" class="first">First</div>', $doc->find('.positions > *')->first()->html(), 'Can find first element');
70+
$this->assertEquals('<div class="last">Last</div>', $doc->find('.positions > *')->last()->html(), 'Can find last element');
71+
$this->assertEquals('<div class="find"><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a></div>', $doc->find('.positions > *')->eq(1)->html(), 'Can specific element');
72+
$this->assertEquals('<div class="find"><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a></div>', $doc->find('.positions > *')->eq(-2)->html(), 'Can specific element');
6973
$this->assertEquals('<div class="find"><h1 class="find__heading">Heading</h1><p class="find__paragraph" title="This is a paragraph">Paragraph</p><a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a></div>', $doc->find('.find')->children()->html(), 'Can specific element');
7074

71-
$this->assertEquals(3, count($doc->find('body > *')->get()));
72-
$this->assertEquals('<div class="last">Last</div>', $doc->find('body > *')->get(2)->html());
73-
$this->assertEquals('<div class="last">Last</div>', $doc->find('body > *')->get(-1)->html());
75+
$this->assertEquals(3, count($doc->find('.positions > *')->get()));
76+
$this->assertEquals('<div class="last">Last</div>', $doc->find('.positions > *')->get(2)->html());
77+
$this->assertEquals('<div class="last">Last</div>', $doc->find('.positions > *')->get(-1)->html());
7478

7579
$cls = ['first', 'find', 'last'];
76-
$divs = $doc->find('body > *');
80+
$divs = $doc->find('.positions > *');
7781
$this->assertTrue(isset($divs[0]), true);
7882
$this->assertEquals($cls[0], $divs[0]->attr('class'));
7983
foreach ($divs AS $key => $item) {

tests/templates/find.html

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,22 @@
44
<title>Find</title>
55
</head>
66
<body>
7-
<div id="first" class="first">First</div>
8-
<div class="find">
9-
<h1 class="find__heading">Heading</h1>
10-
<p class="find__paragraph" title="This is a paragraph">Paragraph</p>
11-
<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>
12-
</div>
13-
<div class="last">Last</div>
7+
<section class="positions">
8+
<div id="first" class="first">First</div>
9+
<div class="find">
10+
<h1 class="find__heading">Heading</h1>
11+
<p class="find__paragraph" title="This is a paragraph">Paragraph</p>
12+
<a class="find__anchor" href="https://github.com/hexydec/htmldoc/">Anchor</a>
13+
</div>
14+
<div class="last">Last</div>
15+
</section>
16+
<section class="attributes">
17+
<div data-attr>attr</div>
18+
<div data-attr="">attr</div>
19+
<div data-attr="attr">attr</div>
20+
<div data-attr="attr-value1">attr</div>
21+
<div data-attr="attr-value2">attr</div>
22+
<div data-word="one two three four">attr</div>
23+
</section>
1424
</body>
1525
</html>

0 commit comments

Comments
 (0)