Skip to content

Commit 1e5657b

Browse files
committed
Merge pull request MyIntervals#69 from slusarz/performance_take2
Performance improvements by caching some invariants
2 parents 8457832 + 6077052 commit 1e5657b

File tree

1 file changed

+85
-65
lines changed

1 file changed

+85
-65
lines changed

lib/Sabberworm/CSS/Parser.php

Lines changed: 85 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ class Parser {
3131
private $oParserSettings;
3232
private $sCharset;
3333
private $iLength;
34+
private $peekCache = null;
35+
private $blockRules;
36+
private $sizeUnits;
3437

3538
public function __construct($sText, Settings $oParserSettings = null) {
3639
$this->sText = $sText;
@@ -39,6 +42,17 @@ public function __construct($sText, Settings $oParserSettings = null) {
3942
$oParserSettings = Settings::create();
4043
}
4144
$this->oParserSettings = $oParserSettings;
45+
$this->blockRules = explode('/', AtRule::BLOCK_RULES);
46+
47+
foreach (explode('/', Size::ABSOLUTE_SIZE_UNITS.'/'.Size::RELATIVE_SIZE_UNITS.'/'.Size::NON_SIZE_UNITS) as $val) {
48+
$size = strlen($val);
49+
if (isset($this->sizeUnits[$size])) {
50+
$this->sizeUnits[$size][] = $val;
51+
} else {
52+
$this->sizeUnits[$size] = array($val);
53+
}
54+
}
55+
ksort($this->sizeUnits, SORT_NUMERIC);
4256
}
4357

4458
public function setCharset($sCharset) {
@@ -102,11 +116,10 @@ private function parseAtRule() {
102116
$this->consume(';');
103117
$this->setCharset($sCharset->getString());
104118
return new Charset($sCharset);
105-
} else if (self::identifierIs($sIdentifier, 'keyframes')) {
119+
} else if ($this->identifierIs($sIdentifier, 'keyframes')) {
106120
$oResult = new KeyFrame();
107121
$oResult->setVendorKeyFrame($sIdentifier);
108-
$oResult->setAnimationName(trim($this->consumeUntil('{')));
109-
$this->consume('{');
122+
$oResult->setAnimationName(trim($this->consumeUntil('{', false, true)));
110123
$this->consumeWhiteSpace();
111124
$this->parseList($oResult);
112125
return $oResult;
@@ -127,12 +140,11 @@ private function parseAtRule() {
127140
return new CSSNamespace($mUrl, $sPrefix);
128141
} else {
129142
//Unknown other at rule (font-face or such)
130-
$sArgs = $this->consumeUntil('{');
131-
$this->consume('{');
143+
$sArgs = $this->consumeUntil('{', false, true);
132144
$this->consumeWhiteSpace();
133145
$bUseRuleSet = true;
134-
foreach(explode('/', AtRule::BLOCK_RULES) as $sBlockRuleName) {
135-
if(self::identifierIs($sIdentifier, $sBlockRuleName)) {
146+
foreach($this->blockRules as $sBlockRuleName) {
147+
if($this->identifierIs($sIdentifier, $sBlockRuleName)) {
136148
$bUseRuleSet = false;
137149
break;
138150
}
@@ -206,7 +218,6 @@ private function parseCharacter($bIsForIdentifier) {
206218
if ($this->comes('\n') || $this->comes('\r')) {
207219
return '';
208220
}
209-
$aMatches;
210221
if (preg_match('/[0-9a-fA-F]/Su', $this->peek()) === 0) {
211222
return $this->consume(1);
212223
}
@@ -223,31 +234,32 @@ private function parseCharacter($bIsForIdentifier) {
223234
}
224235
$iUnicode = intval($sUnicode, 16);
225236
$sUtf32 = "";
226-
for ($i = 0; $i < 4; $i++) {
237+
for ($i = 0; $i < 4; ++$i) {
227238
$sUtf32 .= chr($iUnicode & 0xff);
228239
$iUnicode = $iUnicode >> 8;
229240
}
230241
return iconv('utf-32le', $this->sCharset, $sUtf32);
231242
}
232243
if ($bIsForIdentifier) {
233-
if (preg_match('/[a-zA-Z0-9]|-|_/u', $this->peek()) === 1) {
234-
return $this->consume(1);
235-
} else if (ord($this->peek()) > 0xa1) {
244+
$peek = ord($this->peek());
245+
// Ranges: a-z A-Z 0-9 - _
246+
if (($peek >= 97 && $peek <= 122) ||
247+
($peek >= 65 && $peek <= 90) ||
248+
($peek >= 48 && $peek <= 57) ||
249+
($peek === 45) ||
250+
($peek === 95) ||
251+
($peek > 0xa1)) {
236252
return $this->consume(1);
237-
} else {
238-
return null;
239253
}
240254
} else {
241255
return $this->consume(1);
242256
}
243-
// Does not reach here
244257
return null;
245258
}
246259

247260
private function parseSelector() {
248261
$oResult = new DeclarationBlock();
249-
$oResult->setSelector($this->consumeUntil('{'));
250-
$this->consume('{');
262+
$oResult->setSelector($this->consumeUntil('{', false, true));
251263
$this->consumeWhiteSpace();
252264
$this->parseRuleSet($oResult);
253265
return $oResult;
@@ -268,7 +280,8 @@ private function parseRuleSet($oRuleSet) {
268280
$sConsume = $this->consumeUntil(array("\n", ";", '}'), true);
269281
// We need to “unfind” the matches to the end of the ruleSet as this will be matched later
270282
if($this->streql($this->substr($sConsume, $this->strlen($sConsume)-1, 1), '}')) {
271-
$this->iCurrentPosition--;
283+
--$this->iCurrentPosition;
284+
$this->peekCache = null;
272285
} else {
273286
$this->consumeWhiteSpace();
274287
while ($this->comes(';')) {
@@ -341,11 +354,10 @@ private function parseValue($aListDelimiters) {
341354
$iStartPosition = null;
342355
while (($iStartPosition = array_search($sDelimiter, $aStack, true)) !== false) {
343356
$iLength = 2; //Number of elements to be joined
344-
for ($i = $iStartPosition + 2; $i < count($aStack); $i+=2) {
357+
for ($i = $iStartPosition + 2; $i < count($aStack); $i+=2, ++$iLength) {
345358
if ($sDelimiter !== $aStack[$i]) {
346359
break;
347360
}
348-
$iLength++;
349361
}
350362
$oList = new RuleValueList($sDelimiter);
351363
for ($i = $iStartPosition - 1; $i - $iStartPosition + 1 < $iLength * 2; $i+=2) {
@@ -369,9 +381,9 @@ private function parsePrimitiveValue() {
369381
$this->consumeWhiteSpace();
370382
if (is_numeric($this->peek()) || ($this->comes('-.') && is_numeric($this->peek(1, 2))) || (($this->comes('-') || $this->comes('.')) && is_numeric($this->peek(1, 1)))) {
371383
$oValue = $this->parseNumericValue();
372-
} else if ($this->comes('#') || $this->comes('rgb') || $this->comes('hsl')) {
384+
} else if ($this->comes('#') || $this->comes('rgb', true) || $this->comes('hsl', true)) {
373385
$oValue = $this->parseColorValue();
374-
} else if ($this->comes('url')) {
386+
} else if ($this->comes('url', true)) {
375387
$oValue = $this->parseURLValue();
376388
} else if ($this->comes("'") || $this->comes('"')) {
377389
$oValue = $this->parseStringValue();
@@ -394,16 +406,16 @@ private function parseNumericValue($bForColor = false) {
394406
$sSize .= $this->consume(1);
395407
}
396408
}
397-
$fSize = floatval($sSize);
409+
398410
$sUnit = null;
399-
foreach(explode('/', Size::ABSOLUTE_SIZE_UNITS.'/'.Size::RELATIVE_SIZE_UNITS.'/'.Size::NON_SIZE_UNITS) as $sDefinedUnit) {
400-
if ($this->comes($sDefinedUnit, 0, true)) {
401-
$sUnit = $sDefinedUnit;
402-
$this->consume($sDefinedUnit);
411+
foreach ($this->sizeUnits as $len => $val) {
412+
if (($pos = array_search($this->peek($len), $val)) !== false) {
413+
$sUnit = $val[$pos];
414+
$this->consume($len);
403415
break;
404416
}
405417
}
406-
return new Size($fSize, $sUnit, $bForColor);
418+
return new Size(floatval($sSize), $sUnit, $bForColor);
407419
}
408420

409421
private function parseColorValue() {
@@ -420,7 +432,7 @@ private function parseColorValue() {
420432
$this->consumeWhiteSpace();
421433
$this->consume('(');
422434
$iLength = $this->strlen($sColorMode);
423-
for ($i = 0; $i < $iLength; $i++) {
435+
for ($i = 0; $i < $iLength; ++$i) {
424436
$this->consumeWhiteSpace();
425437
$aColor[$sColorMode[$i]] = $this->parseNumericValue(true);
426438
$this->consumeWhiteSpace();
@@ -434,7 +446,7 @@ private function parseColorValue() {
434446
}
435447

436448
private function parseURLValue() {
437-
$bUseUrl = $this->comes('url');
449+
$bUseUrl = $this->comes('url', true);
438450
if ($bUseUrl) {
439451
$this->consume('url');
440452
$this->consumeWhiteSpace();
@@ -448,38 +460,37 @@ private function parseURLValue() {
448460
}
449461
return $oResult;
450462
}
451-
463+
452464
/**
453465
* Tests an identifier for a given value. Since identifiers are all keywords, they can be vendor-prefixed. We need to check for these versions too.
454466
*/
455-
private static function identifierIs($sIdentifier, $sMatch, $bCaseInsensitive = true) {
456-
return preg_match("/^(-\\w+-)?$sMatch$/".($bCaseInsensitive ? 'i' : ''), $sIdentifier) === 1;
467+
private function identifierIs($sIdentifier, $sMatch) {
468+
return (strcasecmp($sIdentifier, $sMatch) === 0)
469+
?: preg_match("/^(-\\w+-)?$sMatch$/i", $sIdentifier) === 1;
457470
}
458471

459-
private function comes($sString, $iOffset = 0, $bCaseInsensitive = true) {
460-
if ($this->isEnd()) {
461-
return false;
462-
}
463-
$sPeek = $this->peek($sString, $iOffset);
464-
return $this->streql($sPeek, $sString, $bCaseInsensitive);
472+
private function comes($sString, $alpha = false) {
473+
$sPeek = $this->peek($alpha ? $this->strlen($sString) : strlen($sString));
474+
return ($sPeek == '')
475+
? false
476+
: $this->streql($sPeek, $sString, $alpha);
465477
}
466478

467479
private function peek($iLength = 1, $iOffset = 0) {
468-
if ($this->isEnd()) {
469-
return '';
480+
if (($peek = (!$iOffset && ($iLength === 1))) &&
481+
!is_null($this->peekCache)) {
482+
return $this->peekCache;
470483
}
471-
if (is_string($iLength)) {
472-
$iLength = $this->strlen($iLength);
473-
}
474-
if (is_string($iOffset)) {
475-
$iOffset = $this->strlen($iOffset);
476-
}
477-
$iOffset = $this->iCurrentPosition + $iOffset;
484+
$iOffset += $this->iCurrentPosition;
478485
if ($iOffset >= $this->iLength) {
479486
return '';
480487
}
481488
$iLength = min($iLength, $this->iLength-$iOffset);
482-
return $this->substr($this->sText, $iOffset, $iLength);
489+
$out = $this->substr($this->sText, $iOffset, $iLength);
490+
if ($peek) {
491+
$this->peekCache = $out;
492+
}
493+
return $out;
483494
}
484495

485496
private function consume($mValue = 1) {
@@ -489,13 +500,15 @@ private function consume($mValue = 1) {
489500
throw new UnexpectedTokenException($mValue, $this->peek(max($iLength, 5)));
490501
}
491502
$this->iCurrentPosition += $this->strlen($mValue);
503+
$this->peekCache = null;
492504
return $mValue;
493505
} else {
494506
if ($this->iCurrentPosition + $mValue > $this->iLength) {
495507
throw new UnexpectedTokenException($mValue, $this->peek(5), 'count');
496508
}
497509
$sResult = $this->substr($this->sText, $this->iCurrentPosition, $mValue);
498510
$this->iCurrentPosition += $mValue;
511+
$this->peekCache = null;
499512
return $sResult;
500513
}
501514
}
@@ -518,9 +531,13 @@ private function consumeWhiteSpace() {
518531

519532
private function consumeComment() {
520533
if ($this->comes('/*')) {
521-
$this->consumeUntil('*/');
522-
$this->consume('*/');
523-
return true;
534+
$this->consume(2);
535+
while ($this->consumeUntil('*', false, true)) {
536+
if ($this->comes('/')) {
537+
$this->consume(1);
538+
return true;
539+
}
540+
}
524541
}
525542
return false;
526543
}
@@ -529,22 +546,25 @@ private function isEnd() {
529546
return $this->iCurrentPosition >= $this->iLength;
530547
}
531548

532-
private function consumeUntil($aEnd, $bIncludeEnd = false) {
549+
private function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false) {
533550
$aEnd = is_array($aEnd) ? $aEnd : array($aEnd);
534-
$iEndPos = null;
535-
foreach ($aEnd as $sEnd) {
536-
$iCurrentEndPos = $this->strpos($this->sText, $sEnd, $this->iCurrentPosition);
537-
if($iCurrentEndPos === false) {
538-
continue;
539-
}
540-
if($iEndPos === null || $iCurrentEndPos < $iEndPos) {
541-
$iEndPos = $iCurrentEndPos + ($bIncludeEnd ? $this->strlen($sEnd) : 0);
551+
$out = '';
552+
$start = $this->iCurrentPosition;
553+
554+
while (($char = $this->consume(1)) !== '') {
555+
if (in_array($char, $aEnd)) {
556+
if ($bIncludeEnd) {
557+
$out .= $char;
558+
} elseif (!$consumeEnd) {
559+
$this->iCurrentPosition -= $this->strlen($char);
560+
}
561+
return $out;
542562
}
563+
$out .= $char;
543564
}
544-
if ($iEndPos === null) {
545-
throw new UnexpectedTokenException('One of ("'.implode('","', $aEnd).'")', $this->peek(5), 'search');
546-
}
547-
return $this->consume($iEndPos - $this->iCurrentPosition);
565+
566+
$this->iCurrentPosition = $start;
567+
throw new UnexpectedTokenException('One of ("'.implode('","', $aEnd).'")', $this->peek(5), 'search');
548568
}
549569

550570
private function inputLeft() {

0 commit comments

Comments
 (0)