Skip to content

Commit d249a33

Browse files
committed
Started reworking the CSS minifier to mmodularise it more.
Moved the CSSdoc classes into a new namespace. Fixed modularity issue with how charsets are handled when encoding is detected in htmldoc::getCharsetFromHtml().
1 parent aed2086 commit d249a33

File tree

7 files changed

+845
-5
lines changed

7 files changed

+845
-5
lines changed

src/autoload.php

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
'hexydec\\html\\style' => $dir.'/tokens/style.php',
1313
'hexydec\\html\\tag' => $dir.'/tokens/tag.php',
1414
'hexydec\\html\\text' => $dir.'/tokens/text.php',
15-
'hexydec\\html\\cssmin' => $dir.'/cssmin.php'
15+
'hexydec\\html\\cssmin' => $dir.'/cssmin.php',
16+
'hexydec\\css\\cssdoc' => __DIR__.'/cssdoc/cssdoc.php',
17+
'hexydec\\css\\mediaquery' => __DIR__.'/cssdoc/tokens/mediaquery.php',
18+
'hexydec\\css\\rule' => __DIR__.'/cssdoc/tokens/rule.php',
19+
'hexydec\\css\\selector' => __DIR__.'/cssdoc/tokens/selector.php',
20+
'hexydec\\css\\property' => __DIR__.'/cssdoc/tokens/property.php'
1621
];
1722
if (isset($classes[$class])) {
1823
return require($classes[$class]);

src/cssdoc/cssdoc.php

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
<?php
2+
declare(strict_types = 1);
3+
namespace hexydec\css;
4+
5+
class cssdoc {
6+
7+
/**
8+
* @var array $tokens Regexp components keyed by their corresponding codename for tokenising HTML
9+
*/
10+
protected static $tokens = [
11+
'whitespace' => '\s++',
12+
'comment' => '\\/\\*[\d\D]*?\\*\\/',
13+
'quotes' => '(?<!\\\\)("(?:[^"\\\\]++|\\\\.)*+"|\'(?:[^\'\\\\]++|\\\\.)*+\')',
14+
'join' => '[>+~]',
15+
'comparison' => '[\^*$<>]?=', // comparison operators for media queries or attribute selectors
16+
'curlyopen' => '{',
17+
'curlyclose' => '}',
18+
'squareopen' => '\[',
19+
'squareclose' => '\]',
20+
'bracketopen' => '\(',
21+
'bracketclose' => '\)',
22+
'comma' => ',',
23+
'colon' => ':',
24+
'semicolon' => ';',
25+
'string' => '!?[^\[\]{}\(\):;,>+=~\^$!"\/ \n\r\t]++'
26+
];
27+
28+
/**
29+
* @var array $config Object configuration array
30+
*/
31+
protected $config = [
32+
'removesemicolon' => true,
33+
'removezerounits' => true,
34+
'removeleadingzero' => true,
35+
'convertquotes' => true,
36+
'removequotes' => true,
37+
'shortenhex' => true,
38+
'lowerhex' => true,
39+
'email' => false,
40+
'maxline' => false,
41+
'output' => 'minify'
42+
];
43+
44+
protected $output = [
45+
'output' => 'beautify'
46+
];
47+
48+
protected $document;
49+
50+
/**
51+
* Constructs the object
52+
*
53+
* @param array $config An array of configuration parameters that is recursively merged with the default config
54+
*/
55+
public function __construct(array $config = []) {
56+
$this->config = array_merge($this->config, $config);
57+
}
58+
59+
/**
60+
* Calculates the length property
61+
*
62+
* @param string $var The name of the property to retrieve, currently 'length' and output
63+
* @return mixed The number of children in the object for length, the output config, or null if the parameter doesn't exist
64+
*/
65+
public function __get(string $var) {
66+
if ($var == 'length') {
67+
return count($this->children);
68+
} elseif ($var == 'output') {
69+
return $this->output;
70+
}
71+
return null;
72+
}
73+
74+
/**
75+
* Retrieves the children of the document as an array
76+
*
77+
* @return array An array of child nodes
78+
*/
79+
public function toArray() : array {
80+
return $this->children;
81+
}
82+
83+
/**
84+
* Open an HTML file from a URL
85+
*
86+
* @param string $url The address of the HTML file to retrieve
87+
* @param resource $context An optional array of context parameters
88+
* @param string &$error A reference to any user error that is generated
89+
* @return mixed The loaded HTML, or false on error
90+
*/
91+
public function open(string $url, resource $context = null, string &$error = null) {
92+
93+
// open a handle to the stream
94+
if (($handle = @fopen($url, 'rb', false, $context)) === false) {
95+
$error = 'Could not open file "'.$url.'"';
96+
97+
// retrieve the stream contents
98+
} elseif (($html = stream_get_contents($handle)) === false) {
99+
$error = 'Could not read file "'.$url.'"';
100+
101+
// success
102+
} else {
103+
104+
// find charset in headers
105+
$charset = null;
106+
$meta = stream_get_meta_data($handle);
107+
if (!empty($meta['wrapper_data'])) {
108+
foreach ($meta['wrapper_data'] AS $item) {
109+
if (mb_stripos($item, 'Content-Type:') === 0 && ($charset = mb_stristr($item, 'charset=')) !== false) {
110+
$charset = mb_substr($charset, 8);
111+
break;
112+
}
113+
}
114+
}
115+
116+
// load html
117+
if ($this->load($html, $charset, $error)) {
118+
return $html;
119+
}
120+
}
121+
return false;
122+
}
123+
124+
/**
125+
* Parse an HTML string into the object
126+
*
127+
* @param string $html A string containing valid HTML
128+
* @param string $charset The charset of the document
129+
* @param string &$error A reference to any user error that is generated
130+
* @return bool Whether the input HTML was parsed
131+
*/
132+
public function load(string $css, string $charset = null, &$error = null) : bool {
133+
134+
// detect the charset
135+
if ($charset || ($charset = $this->getCharsetFromCss($css)) !== null) {
136+
$css = mb_convert_encoding($css, mb_internal_encoding(), $charset);
137+
}
138+
139+
// reset the document
140+
$this->children = [];
141+
142+
// tokenise the input HTML
143+
if (($tokens = \hexydec\html\tokenise::tokenise($css, self::$tokens)) === false) {
144+
$error = 'Could not tokenise input';
145+
146+
// parse the document
147+
} elseif (!$this->parse($tokens)) {
148+
$error = 'Input is not invalid';
149+
150+
// success
151+
} else {
152+
// var_dump($tokens);
153+
return true;
154+
}
155+
return false;
156+
}
157+
158+
/**
159+
* Reads the charset defined in the Content-Type meta tag, or detects the charset from the HTML content
160+
*
161+
* @param string $html A string containing valid HTML
162+
* @return string The defined or detected charset or null if the charset is not defined
163+
*/
164+
protected function getCharsetFromCss(string $css) : ?string {
165+
if (mb_strpos($css, '@charset') === 0 && ($end = mb_strpos($css, '";')) !== false) {
166+
return mb_substr($css, 10, $end);
167+
} elseif (($charset = mb_detect_encoding($css)) !== false) {
168+
return $charset;
169+
}
170+
return null;
171+
}
172+
173+
/**
174+
* Parses an array of tokens into an CSS document
175+
*
176+
* @param array &$tokens An array of tokens generated by tokenise()
177+
* @return bool Whether the parser was able to capture any objects
178+
*/
179+
protected function parse(array &$tokens) : bool {
180+
$this->document = new mediaquery($this);
181+
return $this->document->parse($tokens);
182+
}
183+
184+
/**
185+
* Minifies the internal representation of the comment
186+
*
187+
* @param array $minify An array of minification options controlling which operations are performed
188+
* @return void
189+
*/
190+
public function minify(array $minify = []) : void {
191+
}
192+
193+
/**
194+
* Compile the property to a string
195+
*
196+
* @param array $options An array of compilation options
197+
* @return void
198+
*/
199+
public function compile(array $options = []) : string {
200+
$options = array_merge($this->output, $options);
201+
return $this->document->compile($options);
202+
}
203+
}

src/cssdoc/tokens/mediaquery.php

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
<?php
2+
declare(strict_types = 1);
3+
namespace hexydec\css;
4+
5+
class mediaquery {
6+
7+
/**
8+
* @var cssdoc The parent htmldoc object
9+
*/
10+
protected $root;
11+
12+
/**
13+
* @var array An array of child token objects
14+
*/
15+
protected $rules = [];
16+
17+
/**
18+
* Constructs the comment object
19+
*
20+
* @param cssdoc $root The parent htmldoc object
21+
*/
22+
public function __construct(cssdoc $root) {
23+
$this->root = $root;
24+
}
25+
26+
/**
27+
* Parses an array of tokens into an HTML documents
28+
*
29+
* @param array &$tokens An array of tokens generated by tokenise()
30+
* @param array $config An array of configuration options
31+
* @return void
32+
*/
33+
public function parse(array &$tokens) : bool {
34+
35+
// parse tokens
36+
while (($token = next($tokens)) !== false) {
37+
switch ($token['type']) {
38+
case 'string':
39+
if ($token['value'] == '@media') {
40+
$item = new mediaquery($this->root);
41+
$item->parse($tokens);
42+
$this->rules[] = $item;
43+
} else {
44+
prev($tokens);
45+
$item = new rule($this);
46+
$item->parse($tokens);
47+
$this->rules[] = $item;
48+
}
49+
break;
50+
}
51+
}
52+
return !!$this->rules;
53+
}
54+
55+
/**
56+
* Minifies the internal representation of the comment
57+
*
58+
* @param array $minify An array of minification options controlling which operations are performed
59+
* @return void
60+
*/
61+
public function minify(array $minify) : void {
62+
}
63+
64+
/**
65+
* Compile the property to a string
66+
*
67+
* @param array $options An array of compilation options
68+
* @return void
69+
*/
70+
public function compile(array $options) : string {
71+
$b = $options['output'] != 'minify';
72+
$css = '';
73+
74+
// compile selectors
75+
$join = '';
76+
foreach ($this->rules AS $item) {
77+
$css .= $join.$item->compile($options);
78+
$join = $b ? "\n\n" : '';
79+
}
80+
return $css;
81+
}
82+
}

0 commit comments

Comments
 (0)