From 9ed24ad97b6586029fa23af7daaebbbe3eaf6251 Mon Sep 17 00:00:00 2001 From: Raphael Schweikert Date: Fri, 19 Oct 2018 09:28:07 +0200 Subject: [PATCH 1/3] Refactor parsing logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Extract parser state into its own instance • Every type should be able to parse its own structure --- lib/Sabberworm/CSS/CSSList/CSSList.php | 149 +++- lib/Sabberworm/CSS/CSSList/Document.php | 8 + lib/Sabberworm/CSS/Parser.php | 788 +----------------- lib/Sabberworm/CSS/Parsing/ParserState.php | 310 +++++++ lib/Sabberworm/CSS/Property/AtRule.php | 5 +- lib/Sabberworm/CSS/Rule/Rule.php | 41 +- .../CSS/RuleSet/DeclarationBlock.php | 13 +- lib/Sabberworm/CSS/RuleSet/RuleSet.php | 41 +- lib/Sabberworm/CSS/Value/CSSString.php | 34 + lib/Sabberworm/CSS/Value/CalcFunction.php | 54 ++ lib/Sabberworm/CSS/Value/Color.php | 54 ++ lib/Sabberworm/CSS/Value/LineName.php | 23 + lib/Sabberworm/CSS/Value/Size.php | 48 ++ lib/Sabberworm/CSS/Value/URL.php | 18 + lib/Sabberworm/CSS/Value/Value.php | 118 ++- 15 files changed, 902 insertions(+), 802 deletions(-) create mode 100644 lib/Sabberworm/CSS/Parsing/ParserState.php diff --git a/lib/Sabberworm/CSS/CSSList/CSSList.php b/lib/Sabberworm/CSS/CSSList/CSSList.php index bc90460b..20b1b6a0 100644 --- a/lib/Sabberworm/CSS/CSSList/CSSList.php +++ b/lib/Sabberworm/CSS/CSSList/CSSList.php @@ -2,11 +2,22 @@ namespace Sabberworm\CSS\CSSList; +use Sabberworm\CSS\Comment\Commentable; +use Sabberworm\CSS\Parsing\ParserState; +use Sabberworm\CSS\Parsing\SourceException; +use Sabberworm\CSS\Parsing\UnexpectedTokenException; +use Sabberworm\CSS\Property\AtRule; +use Sabberworm\CSS\Property\Charset; +use Sabberworm\CSS\Property\CSSNamespace; +use Sabberworm\CSS\Property\Import; +use Sabberworm\CSS\Property\Selector; use Sabberworm\CSS\Renderable; +use Sabberworm\CSS\RuleSet\AtRuleSet; use Sabberworm\CSS\RuleSet\DeclarationBlock; use Sabberworm\CSS\RuleSet\RuleSet; -use Sabberworm\CSS\Property\Selector; -use Sabberworm\CSS\Comment\Commentable; +use Sabberworm\CSS\Value\CSSString; +use Sabberworm\CSS\Value\URL; +use Sabberworm\CSS\Value\Value; /** * A CSSList is the most generic container available. Its contents include RuleSet as well as other CSSList objects. @@ -24,6 +35,140 @@ public function __construct($iLineNo = 0) { $this->iLineNo = $iLineNo; } + public static function parseList(ParserState $oParserState, CSSList $oList) { + $bIsRoot = $oList instanceof Document; + if(is_string($oParserState)) { + $oParserState = new ParserState($oParserState); + } + $bLenientParsing = $oParserState->getSettings()->bLenientParsing; + while(!$oParserState->isEnd()) { + $comments = $oParserState->consumeWhiteSpace(); + $oListItem = null; + if($bLenientParsing) { + try { + $oListItem = self::parseListItem($oParserState, $oList); + } catch (UnexpectedTokenException $e) { + $oListItem = false; + } + } else { + $oListItem = self::parseListItem($oParserState, $oList); + } + if($oListItem === null) { + // List parsing finished + return; + } + if($oListItem) { + $oListItem->setComments($comments); + $oList->append($oListItem); + } + $oParserState->consumeWhiteSpace(); + } + if(!$bIsRoot && !$bLenientParsing) { + throw new SourceException("Unexpected end of document", $oParserState->currentLine()); + } + } + + private static function parseListItem(ParserState $oParserState, CSSList $oList) { + $bIsRoot = $oList instanceof Document; + if ($oParserState->comes('@')) { + $oAtRule = self::parseAtRule($oParserState); + if($oAtRule instanceof Charset) { + if(!$bIsRoot) { + throw new UnexpectedTokenException('@charset may only occur in root document', '', 'custom', $oParserState->currentLine()); + } + if(count($oList->getContents()) > 0) { + throw new UnexpectedTokenException('@charset must be the first parseable token in a document', '', 'custom', $oParserState->currentLine()); + } + $oParserState->setCharset($oAtRule->getCharset()->getString()); + } + return $oAtRule; + } else if ($oParserState->comes('}')) { + $oParserState->consume('}'); + if ($bIsRoot) { + if ($oParserState->getSettings()->bLenientParsing) { + while ($oParserState->comes('}')) $oParserState->consume('}'); + return DeclarationBlock::parse($oParserState); + } else { + throw new SourceException("Unopened {", $oParserState->currentLine()); + } + } else { + return null; + } + } else { + return DeclarationBlock::parse($oParserState); + } + } + + private static function parseAtRule(ParserState $oParserState) { + $oParserState->consume('@'); + $sIdentifier = $oParserState->parseIdentifier(); + $iIdentifierLineNum = $oParserState->currentLine(); + $oParserState->consumeWhiteSpace(); + if ($sIdentifier === 'import') { + $oLocation = URL::parse($oParserState); + $oParserState->consumeWhiteSpace(); + $sMediaQuery = null; + if (!$oParserState->comes(';')) { + $sMediaQuery = $oParserState->consumeUntil(';'); + } + $oParserState->consume(';'); + return new Import($oLocation, $sMediaQuery, $iIdentifierLineNum); + } else if ($sIdentifier === 'charset') { + $sCharset = CSSString::parse($oParserState); + $oParserState->consumeWhiteSpace(); + $oParserState->consume(';'); + return new Charset($sCharset, $iIdentifierLineNum); + } else if (self::identifierIs($sIdentifier, 'keyframes')) { + $oResult = new KeyFrame($iIdentifierLineNum); + $oResult->setVendorKeyFrame($sIdentifier); + $oResult->setAnimationName(trim($oParserState->consumeUntil('{', false, true))); + CSSList::parseList($oParserState, $oResult); + return $oResult; + } else if ($sIdentifier === 'namespace') { + $sPrefix = null; + $mUrl = Value::parsePrimitiveValue($oParserState); + if (!$oParserState->comes(';')) { + $sPrefix = $mUrl; + $mUrl = Value::parsePrimitiveValue($oParserState); + } + $oParserState->consume(';'); + if ($sPrefix !== null && !is_string($sPrefix)) { + throw new UnexpectedTokenException('Wrong namespace prefix', $sPrefix, 'custom', $iIdentifierLineNum); + } + if (!($mUrl instanceof CSSString || $mUrl instanceof URL)) { + throw new UnexpectedTokenException('Wrong namespace url of invalid type', $mUrl, 'custom', $iIdentifierLineNum); + } + return new CSSNamespace($mUrl, $sPrefix, $iIdentifierLineNum); + } else { + //Unknown other at rule (font-face or such) + $sArgs = trim($oParserState->consumeUntil('{', false, true)); + $bUseRuleSet = true; + foreach(explode('/', AtRule::BLOCK_RULES) as $sBlockRuleName) { + if(self::identifierIs($sIdentifier, $sBlockRuleName)) { + $bUseRuleSet = false; + break; + } + } + if($bUseRuleSet) { + $oAtRule = new AtRuleSet($sIdentifier, $sArgs, $iIdentifierLineNum); + RuleSet::parseRuleSet($oParserState, $oAtRule); + } else { + $oAtRule = new AtRuleBlockList($sIdentifier, $sArgs, $iIdentifierLineNum); + CSSList::parseList($oParserState, $oAtRule); + } + return $oAtRule; + } + } + + /** + * Tests an identifier for a given value. Since identifiers are all keywords, they can be vendor-prefixed. We need to check for these versions too. + */ + private static function identifierIs($sIdentifier, $sMatch) { + return (strcasecmp($sIdentifier, $sMatch) === 0) + ?: preg_match("/^(-\\w+-)?$sMatch$/i", $sIdentifier) === 1; + } + + /** * @return int */ diff --git a/lib/Sabberworm/CSS/CSSList/Document.php b/lib/Sabberworm/CSS/CSSList/Document.php index bd4a23ee..873df755 100644 --- a/lib/Sabberworm/CSS/CSSList/Document.php +++ b/lib/Sabberworm/CSS/CSSList/Document.php @@ -2,6 +2,8 @@ namespace Sabberworm\CSS\CSSList; +use Sabberworm\CSS\Parsing\ParserState; + /** * The root CSSList of a parsed file. Contains all top-level css contents, mostly declaration blocks, but also any @-rules encountered. */ @@ -14,6 +16,12 @@ public function __construct($iLineNo = 0) { parent::__construct($iLineNo); } + public static function parse(ParserState $oParserState) { + $oDocument = new Document($oParserState->currentLine()); + CSSList::parseList($oParserState, $oDocument); + return $oDocument; + } + /** * Gets all DeclarationBlock objects recursively. */ diff --git a/lib/Sabberworm/CSS/Parser.php b/lib/Sabberworm/CSS/Parser.php index adc0d390..a50a3592 100644 --- a/lib/Sabberworm/CSS/Parser.php +++ b/lib/Sabberworm/CSS/Parser.php @@ -2,45 +2,18 @@ namespace Sabberworm\CSS; -use Sabberworm\CSS\CSSList\CSSList; use Sabberworm\CSS\CSSList\Document; -use Sabberworm\CSS\CSSList\KeyFrame; -use Sabberworm\CSS\Parsing\SourceException; -use Sabberworm\CSS\Property\AtRule; -use Sabberworm\CSS\Property\Import; -use Sabberworm\CSS\Property\Charset; -use Sabberworm\CSS\Property\CSSNamespace; -use Sabberworm\CSS\RuleSet\AtRuleSet; -use Sabberworm\CSS\CSSList\AtRuleBlockList; -use Sabberworm\CSS\RuleSet\DeclarationBlock; -use Sabberworm\CSS\Value\CSSFunction; -use Sabberworm\CSS\Value\CalcFunction; -use Sabberworm\CSS\Value\RuleValueList; -use Sabberworm\CSS\Value\CalcRuleValueList; -use Sabberworm\CSS\Value\Size; -use Sabberworm\CSS\Value\Color; -use Sabberworm\CSS\Value\URL; -use Sabberworm\CSS\Value\CSSString; -use Sabberworm\CSS\Value\LineName; -use Sabberworm\CSS\Rule\Rule; -use Sabberworm\CSS\Parsing\UnexpectedTokenException; -use Sabberworm\CSS\Comment\Comment; +use Sabberworm\CSS\Parsing\ParserState; /** * Parser class parses CSS from text into a data structure. */ class Parser { - private $sText; - private $aText; - private $iCurrentPosition; - private $oParserSettings; - private $sCharset; - private $iLength; - private $blockRules; - private $aSizeUnits; private $iLineNo; + private $oParserState; + /** * Parser constructor. * Note that that iLineNo starts from 1 and not 0 @@ -51,769 +24,24 @@ class Parser { */ public function __construct($sText, Settings $oParserSettings = null, $iLineNo = 1) { $this->sText = $sText; - $this->iCurrentPosition = 0; - $this->iLineNo = $iLineNo; if ($oParserSettings === null) { $oParserSettings = Settings::create(); } $this->oParserSettings = $oParserSettings; - $this->blockRules = explode('/', AtRule::BLOCK_RULES); - - foreach (explode('/', Size::ABSOLUTE_SIZE_UNITS.'/'.Size::RELATIVE_SIZE_UNITS.'/'.Size::NON_SIZE_UNITS) as $val) { - $iSize = strlen($val); - if(!isset($this->aSizeUnits[$iSize])) { - $this->aSizeUnits[$iSize] = array(); - } - $this->aSizeUnits[$iSize][strtolower($val)] = $val; - } - ksort($this->aSizeUnits, SORT_NUMERIC); + $this->iLineNo = $iLineNo; } public function setCharset($sCharset) { - $this->sCharset = $sCharset; - $this->aText = $this->strsplit($this->sText); - $this->iLength = count($this->aText); + $this->oParserHelper->setCharset($sCharset); } public function getCharset() { - return $this->sCharset; + $this->oParserHelper->getCharset(); } public function parse() { - $this->setCharset($this->oParserSettings->sDefaultCharset); - $oResult = new Document($this->iLineNo); - $this->parseDocument($oResult); - return $oResult; - } - - private function parseDocument(Document $oDocument) { - $this->parseList($oDocument, true); - } - - private function parseList(CSSList $oList, $bIsRoot = false) { - while (!$this->isEnd()) { - $comments = $this->consumeWhiteSpace(); - $oListItem = null; - if($this->oParserSettings->bLenientParsing) { - try { - $oListItem = $this->parseListItem($oList, $bIsRoot); - } catch (UnexpectedTokenException $e) { - $oListItem = false; - } - } else { - $oListItem = $this->parseListItem($oList, $bIsRoot); - } - if($oListItem === null) { - // List parsing finished - return; - } - if($oListItem) { - $oListItem->setComments($comments); - $oList->append($oListItem); - } - $this->consumeWhiteSpace(); - } - if (!$bIsRoot && !$this->oParserSettings->bLenientParsing) { - throw new SourceException("Unexpected end of document", $this->iLineNo); - } - } - - private function parseListItem(CSSList $oList, $bIsRoot = false) { - if ($this->comes('@')) { - $oAtRule = $this->parseAtRule(); - if($oAtRule instanceof Charset) { - if(!$bIsRoot) { - throw new UnexpectedTokenException('@charset may only occur in root document', '', 'custom', $this->iLineNo); - } - if(count($oList->getContents()) > 0) { - throw new UnexpectedTokenException('@charset must be the first parseable token in a document', '', 'custom', $this->iLineNo); - } - $this->setCharset($oAtRule->getCharset()->getString()); - } - return $oAtRule; - } else if ($this->comes('}')) { - $this->consume('}'); - if ($bIsRoot) { - if ($this->oParserSettings->bLenientParsing) { - while ($this->comes('}')) $this->consume('}'); - return $this->parseSelector(); - } else { - throw new SourceException("Unopened {", $this->iLineNo); - } - } else { - return null; - } - } else { - return $this->parseSelector(); - } - } - - private function parseAtRule() { - $this->consume('@'); - $sIdentifier = $this->parseIdentifier(false); - $iIdentifierLineNum = $this->iLineNo; - $this->consumeWhiteSpace(); - if ($sIdentifier === 'import') { - $oLocation = $this->parseURLValue(); - $this->consumeWhiteSpace(); - $sMediaQuery = null; - if (!$this->comes(';')) { - $sMediaQuery = $this->consumeUntil(';'); - } - $this->consume(';'); - return new Import($oLocation, $sMediaQuery, $iIdentifierLineNum); - } else if ($sIdentifier === 'charset') { - $sCharset = $this->parseStringValue(); - $this->consumeWhiteSpace(); - $this->consume(';'); - return new Charset($sCharset, $iIdentifierLineNum); - } else if ($this->identifierIs($sIdentifier, 'keyframes')) { - $oResult = new KeyFrame($iIdentifierLineNum); - $oResult->setVendorKeyFrame($sIdentifier); - $oResult->setAnimationName(trim($this->consumeUntil('{', false, true))); - $this->parseList($oResult); - return $oResult; - } else if ($sIdentifier === 'namespace') { - $sPrefix = null; - $mUrl = $this->parsePrimitiveValue(); - if (!$this->comes(';')) { - $sPrefix = $mUrl; - $mUrl = $this->parsePrimitiveValue(); - } - $this->consume(';'); - if ($sPrefix !== null && !is_string($sPrefix)) { - throw new UnexpectedTokenException('Wrong namespace prefix', $sPrefix, 'custom', $iIdentifierLineNum); - } - if (!($mUrl instanceof CSSString || $mUrl instanceof URL)) { - throw new UnexpectedTokenException('Wrong namespace url of invalid type', $mUrl, 'custom', $iIdentifierLineNum); - } - return new CSSNamespace($mUrl, $sPrefix, $iIdentifierLineNum); - } else { - //Unknown other at rule (font-face or such) - $sArgs = trim($this->consumeUntil('{', false, true)); - $bUseRuleSet = true; - foreach($this->blockRules as $sBlockRuleName) { - if($this->identifierIs($sIdentifier, $sBlockRuleName)) { - $bUseRuleSet = false; - break; - } - } - if($bUseRuleSet) { - $oAtRule = new AtRuleSet($sIdentifier, $sArgs, $iIdentifierLineNum); - $this->parseRuleSet($oAtRule); - } else { - $oAtRule = new AtRuleBlockList($sIdentifier, $sArgs, $iIdentifierLineNum); - $this->parseList($oAtRule); - } - return $oAtRule; - } - } - - private function parseIdentifier($bAllowFunctions = true, $bIgnoreCase = true) { - $sResult = $this->parseCharacter(true); - if ($sResult === null) { - throw new UnexpectedTokenException($sResult, $this->peek(5), 'identifier', $this->iLineNo); - } - $sCharacter = null; - while (($sCharacter = $this->parseCharacter(true)) !== null) { - $sResult .= $sCharacter; - } - if ($bIgnoreCase) { - $sResult = $this->strtolower($sResult); - } - if ($bAllowFunctions && $this->comes('(')) { - $this->consume('('); - $aArguments = $this->parseValue(array('=', ' ', ',')); - $sResult = new CSSFunction($sResult, $aArguments, ',', $this->iLineNo); - $this->consume(')'); - } - return $sResult; - } - - private function parseStringValue() { - $sBegin = $this->peek(); - $sQuote = null; - if ($sBegin === "'") { - $sQuote = "'"; - } else if ($sBegin === '"') { - $sQuote = '"'; - } - if ($sQuote !== null) { - $this->consume($sQuote); - } - $sResult = ""; - $sContent = null; - if ($sQuote === null) { - //Unquoted strings end in whitespace or with braces, brackets, parentheses - while (!preg_match('/[\\s{}()<>\\[\\]]/isu', $this->peek())) { - $sResult .= $this->parseCharacter(false); - } - } else { - while (!$this->comes($sQuote)) { - $sContent = $this->parseCharacter(false); - if ($sContent === null) { - throw new SourceException("Non-well-formed quoted string {$this->peek(3)}", $this->iLineNo); - } - $sResult .= $sContent; - } - $this->consume($sQuote); - } - return new CSSString($sResult, $this->iLineNo); - } - - private function parseCharacter($bIsForIdentifier) { - if ($this->peek() === '\\') { - if ($bIsForIdentifier && $this->oParserSettings->bLenientParsing && ($this->comes('\0') || $this->comes('\9'))) { - // Non-strings can contain \0 or \9 which is an IE hack supported in lenient parsing. - return null; - } - $this->consume('\\'); - if ($this->comes('\n') || $this->comes('\r')) { - return ''; - } - if (preg_match('/[0-9a-fA-F]/Su', $this->peek()) === 0) { - return $this->consume(1); - } - $sUnicode = $this->consumeExpression('/^[0-9a-fA-F]{1,6}/u', 6); - if ($this->strlen($sUnicode) < 6) { - //Consume whitespace after incomplete unicode escape - if (preg_match('/\\s/isSu', $this->peek())) { - if ($this->comes('\r\n')) { - $this->consume(2); - } else { - $this->consume(1); - } - } - } - $iUnicode = intval($sUnicode, 16); - $sUtf32 = ""; - for ($i = 0; $i < 4; ++$i) { - $sUtf32 .= chr($iUnicode & 0xff); - $iUnicode = $iUnicode >> 8; - } - return iconv('utf-32le', $this->sCharset, $sUtf32); - } - if ($bIsForIdentifier) { - $peek = ord($this->peek()); - // Ranges: a-z A-Z 0-9 - _ - if (($peek >= 97 && $peek <= 122) || - ($peek >= 65 && $peek <= 90) || - ($peek >= 48 && $peek <= 57) || - ($peek === 45) || - ($peek === 95) || - ($peek > 0xa1)) { - return $this->consume(1); - } - } else { - return $this->consume(1); - } - return null; - } - - private function parseSelector() { - $aComments = array(); - $oResult = new DeclarationBlock($this->iLineNo); - $oResult->setSelector($this->consumeUntil('{', false, true, $aComments)); - $oResult->setComments($aComments); - $this->parseRuleSet($oResult); - return $oResult; - } - - private function parseRuleSet($oRuleSet) { - while ($this->comes(';')) { - $this->consume(';'); - } - while (!$this->comes('}')) { - $oRule = null; - if($this->oParserSettings->bLenientParsing) { - try { - $oRule = $this->parseRule(); - } catch (UnexpectedTokenException $e) { - try { - $sConsume = $this->consumeUntil(array("\n", ";", '}'), true); - // We need to “unfind” the matches to the end of the ruleSet as this will be matched later - if($this->streql(substr($sConsume, -1), '}')) { - --$this->iCurrentPosition; - } else { - while ($this->comes(';')) { - $this->consume(';'); - } - } - } catch (UnexpectedTokenException $e) { - // We’ve reached the end of the document. Just close the RuleSet. - return; - } - } - } else { - $oRule = $this->parseRule(); - } - if($oRule) { - $oRuleSet->addRule($oRule); - } - } - $this->consume('}'); - } - - private function parseRule() { - $aComments = $this->consumeWhiteSpace(); - $oRule = new Rule($this->parseIdentifier(), $this->iLineNo); - $oRule->setComments($aComments); - $oRule->addComments($this->consumeWhiteSpace()); - $this->consume(':'); - $oValue = $this->parseValue(self::listDelimiterForRule($oRule->getRule())); - $oRule->setValue($oValue); - if ($this->oParserSettings->bLenientParsing) { - while ($this->comes('\\')) { - $this->consume('\\'); - $oRule->addIeHack($this->consume()); - $this->consumeWhiteSpace(); - } - } - $this->consumeWhiteSpace(); - if ($this->comes('!')) { - $this->consume('!'); - $this->consumeWhiteSpace(); - $this->consume('important'); - $oRule->setIsImportant(true); - } - $this->consumeWhiteSpace(); - while ($this->comes(';')) { - $this->consume(';'); - } - $this->consumeWhiteSpace(); - return $oRule; - } - - private function parseValue($aListDelimiters) { - $aStack = array(); - $this->consumeWhiteSpace(); - //Build a list of delimiters and parsed values - while (!($this->comes('}') || $this->comes(';') || $this->comes('!') || $this->comes(')') || $this->comes('\\'))) { - if (count($aStack) > 0) { - $bFoundDelimiter = false; - foreach ($aListDelimiters as $sDelimiter) { - if ($this->comes($sDelimiter)) { - array_push($aStack, $this->consume($sDelimiter)); - $this->consumeWhiteSpace(); - $bFoundDelimiter = true; - break; - } - } - if (!$bFoundDelimiter) { - //Whitespace was the list delimiter - array_push($aStack, ' '); - } - } - array_push($aStack, $this->parsePrimitiveValue()); - $this->consumeWhiteSpace(); - } - //Convert the list to list objects - foreach ($aListDelimiters as $sDelimiter) { - if (count($aStack) === 1) { - return $aStack[0]; - } - $iStartPosition = null; - while (($iStartPosition = array_search($sDelimiter, $aStack, true)) !== false) { - $iLength = 2; //Number of elements to be joined - for ($i = $iStartPosition + 2; $i < count($aStack); $i+=2, ++$iLength) { - if ($sDelimiter !== $aStack[$i]) { - break; - } - } - $oList = new RuleValueList($sDelimiter, $this->iLineNo); - for ($i = $iStartPosition - 1; $i - $iStartPosition + 1 < $iLength * 2; $i+=2) { - $oList->addListComponent($aStack[$i]); - } - array_splice($aStack, $iStartPosition - 1, $iLength * 2 - 1, array($oList)); - } - } - return $aStack[0]; - } - - private static function listDelimiterForRule($sRule) { - if (preg_match('/^font($|-)/', $sRule)) { - return array(',', '/', ' '); - } - return array(',', ' ', '/'); - } - - private function parsePrimitiveValue() { - $oValue = null; - $this->consumeWhiteSpace(); - if (is_numeric($this->peek()) || ($this->comes('-.') && is_numeric($this->peek(1, 2))) || (($this->comes('-') || $this->comes('.')) && is_numeric($this->peek(1, 1)))) { - $oValue = $this->parseNumericValue(); - } else if ($this->comes('#') || $this->comes('rgb', true) || $this->comes('hsl', true)) { - $oValue = $this->parseColorValue(); - } else if ($this->comes('url', true)) { - $oValue = $this->parseURLValue(); - } else if ($this->comes('calc', true) || $this->comes('-webkit-calc', true) || $this->comes('-moz-calc', true)) { - $oValue = $this->parseCalcValue(); - } else if ($this->comes("'") || $this->comes('"')) { - $oValue = $this->parseStringValue(); - } else if ($this->comes("progid:") && $this->oParserSettings->bLenientParsing) { - $oValue = $this->parseMicrosoftFilter(); - } else if ($this->comes("[")) { - $oValue = $this->parseLineNameValue(); - } else { - $oValue = $this->parseIdentifier(true, false); - } - $this->consumeWhiteSpace(); - return $oValue; - } - - private function parseNumericValue($bForColor = false) { - $sSize = ''; - if ($this->comes('-')) { - $sSize .= $this->consume('-'); - } - while (is_numeric($this->peek()) || $this->comes('.')) { - if ($this->comes('.')) { - $sSize .= $this->consume('.'); - } else { - $sSize .= $this->consume(1); - } - } - - $sUnit = null; - foreach ($this->aSizeUnits as $iLength => &$aValues) { - $sKey = strtolower($this->peek($iLength)); - if(array_key_exists($sKey, $aValues)) { - if (($sUnit = $aValues[$sKey]) !== null) { - $this->consume($iLength); - break; - } - } - } - return new Size(floatval($sSize), $sUnit, $bForColor, $this->iLineNo); - } - - private function parseLineNameValue() { - $this->consume('['); - $this->consumeWhiteSpace(); - $aNames = array(); - do { - if($this->oParserSettings->bLenientParsing) { - try { - $aNames[] = $this->parseIdentifier(false, true); - } catch(UnexpectedTokenException $e) {} - } else { - $aNames[] = $this->parseIdentifier(false, true); - } - $this->consumeWhiteSpace(); - } while (!$this->comes(']')); - $this->consume(']'); - return new LineName($aNames, $this->iLineNo); - } - - private function parseColorValue() { - $aColor = array(); - if ($this->comes('#')) { - $this->consume('#'); - $sValue = $this->parseIdentifier(false); - if ($this->strlen($sValue) === 3) { - $sValue = $sValue[0] . $sValue[0] . $sValue[1] . $sValue[1] . $sValue[2] . $sValue[2]; - } else if ($this->strlen($sValue) === 4) { - $sValue = $sValue[0] . $sValue[0] . $sValue[1] . $sValue[1] . $sValue[2] . $sValue[2] . $sValue[3] . $sValue[3]; - } - - if ($this->strlen($sValue) === 8) { - $aColor = array('r' => new Size(intval($sValue[0] . $sValue[1], 16), null, true, $this->iLineNo), 'g' => new Size(intval($sValue[2] . $sValue[3], 16), null, true, $this->iLineNo), 'b' => new Size(intval($sValue[4] . $sValue[5], 16), null, true, $this->iLineNo), 'a' => new Size(round($this->mapRange(intval($sValue[6] . $sValue[7], 16), 0, 255, 0, 1), 2), null, true, $this->iLineNo)); - } else { - $aColor = array('r' => new Size(intval($sValue[0] . $sValue[1], 16), null, true, $this->iLineNo), 'g' => new Size(intval($sValue[2] . $sValue[3], 16), null, true, $this->iLineNo), 'b' => new Size(intval($sValue[4] . $sValue[5], 16), null, true, $this->iLineNo)); - } - } else { - $sColorMode = $this->parseIdentifier(false); - $this->consumeWhiteSpace(); - $this->consume('('); - $iLength = $this->strlen($sColorMode); - for ($i = 0; $i < $iLength; ++$i) { - $this->consumeWhiteSpace(); - $aColor[$sColorMode[$i]] = $this->parseNumericValue(true); - $this->consumeWhiteSpace(); - if ($i < ($iLength - 1)) { - $this->consume(','); - } - } - $this->consume(')'); - } - return new Color($aColor, $this->iLineNo); - } - - private function parseMicrosoftFilter() { - $sFunction = $this->consumeUntil('(', false, true); - $aArguments = $this->parseValue(array(',', '=')); - return new CSSFunction($sFunction, $aArguments, ',', $this->iLineNo); - } - - private function parseURLValue() { - $bUseUrl = $this->comes('url', true); - if ($bUseUrl) { - $this->consume('url'); - $this->consumeWhiteSpace(); - $this->consume('('); - } - $this->consumeWhiteSpace(); - $oResult = new URL($this->parseStringValue(), $this->iLineNo); - if ($bUseUrl) { - $this->consumeWhiteSpace(); - $this->consume(')'); - } - return $oResult; - } - - private function parseCalcValue() { - $aOperators = array('+', '-', '*', '/'); - $sFunction = trim($this->consumeUntil('(', false, true)); - $oCalcList = new CalcRuleValueList($this->iLineNo); - $oList = new RuleValueList(',', $this->iLineNo); - $iNestingLevel = 0; - $iLastComponentType = NULL; - while(!$this->comes(')') || $iNestingLevel > 0) { - $this->consumeWhiteSpace(); - if ($this->comes('(')) { - $iNestingLevel++; - $oCalcList->addListComponent($this->consume(1)); - continue; - } else if ($this->comes(')')) { - $iNestingLevel--; - $oCalcList->addListComponent($this->consume(1)); - continue; - } - if ($iLastComponentType != CalcFunction::T_OPERAND) { - $oVal = $this->parsePrimitiveValue(); - $oCalcList->addListComponent($oVal); - $iLastComponentType = CalcFunction::T_OPERAND; - } else { - if (in_array($this->peek(), $aOperators)) { - if (($this->comes('-') || $this->comes('+'))) { - if ($this->peek(1, -1) != ' ' || !($this->comes('- ') || $this->comes('+ '))) { - throw new UnexpectedTokenException(" {$this->peek()} ", $this->peek(1, -1) . $this->peek(2), 'literal', $this->iLineNo); - } - } - $oCalcList->addListComponent($this->consume(1)); - $iLastComponentType = CalcFunction::T_OPERATOR; - } else { - throw new UnexpectedTokenException(sprintf('Next token was expected to be an operand of type %s. Instead "%s" was found.', implode(', ', $aOperators), $oVal), '', 'custom', $this->iLineNo); - } - } - } - $oList->addListComponent($oCalcList); - $this->consume(')'); - return new CalcFunction($sFunction, $oList, ',', $this->iLineNo); - } - - /** - * Tests an identifier for a given value. Since identifiers are all keywords, they can be vendor-prefixed. We need to check for these versions too. - */ - private function identifierIs($sIdentifier, $sMatch) { - return (strcasecmp($sIdentifier, $sMatch) === 0) - ?: preg_match("/^(-\\w+-)?$sMatch$/i", $sIdentifier) === 1; - } - - private function comes($sString, $bCaseInsensitive = false) { - $sPeek = $this->peek(strlen($sString)); - return ($sPeek == '') - ? false - : $this->streql($sPeek, $sString, $bCaseInsensitive); - } - - private function peek($iLength = 1, $iOffset = 0) { - $iOffset += $this->iCurrentPosition; - if ($iOffset >= $this->iLength) { - return ''; - } - return $this->substr($iOffset, $iLength); - } - - private function consume($mValue = 1) { - if (is_string($mValue)) { - $iLineCount = substr_count($mValue, "\n"); - $iLength = $this->strlen($mValue); - if (!$this->streql($this->substr($this->iCurrentPosition, $iLength), $mValue)) { - throw new UnexpectedTokenException($mValue, $this->peek(max($iLength, 5)), $this->iLineNo); - } - $this->iLineNo += $iLineCount; - $this->iCurrentPosition += $this->strlen($mValue); - return $mValue; - } else { - if ($this->iCurrentPosition + $mValue > $this->iLength) { - throw new UnexpectedTokenException($mValue, $this->peek(5), 'count', $this->iLineNo); - } - $sResult = $this->substr($this->iCurrentPosition, $mValue); - $iLineCount = substr_count($sResult, "\n"); - $this->iLineNo += $iLineCount; - $this->iCurrentPosition += $mValue; - return $sResult; - } - } - - private function consumeExpression($mExpression, $iMaxLength = null) { - $aMatches = null; - $sInput = $iMaxLength !== null ? $this->peek($iMaxLength) : $this->inputLeft(); - if (preg_match($mExpression, $sInput, $aMatches, PREG_OFFSET_CAPTURE) === 1) { - return $this->consume($aMatches[0][0]); - } - throw new UnexpectedTokenException($mExpression, $this->peek(5), 'expression', $this->iLineNo); - } - - private function consumeWhiteSpace() { - $comments = array(); - do { - while (preg_match('/\\s/isSu', $this->peek()) === 1) { - $this->consume(1); - } - if($this->oParserSettings->bLenientParsing) { - try { - $oComment = $this->consumeComment(); - } catch(UnexpectedTokenException $e) { - // When we can’t find the end of a comment, we assume the document is finished. - $this->iCurrentPosition = $this->iLength; - return; - } - } else { - $oComment = $this->consumeComment(); - } - if ($oComment !== false) { - $comments[] = $oComment; - } - } while($oComment !== false); - return $comments; - } - - /** - * @return false|Comment - */ - private function consumeComment() { - $mComment = false; - if ($this->comes('/*')) { - $iLineNo = $this->iLineNo; - $this->consume(1); - $mComment = ''; - while (($char = $this->consume(1)) !== '') { - $mComment .= $char; - if ($this->comes('*/')) { - $this->consume(2); - break; - } - } - } - - if ($mComment !== false) { - // We skip the * which was included in the comment. - return new Comment(substr($mComment, 1), $iLineNo); - } - - return $mComment; - } - - private function isEnd() { - return $this->iCurrentPosition >= $this->iLength; - } - - private function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false, array &$comments = array()) { - $aEnd = is_array($aEnd) ? $aEnd : array($aEnd); - $out = ''; - $start = $this->iCurrentPosition; - - while (($char = $this->consume(1)) !== '') { - if (in_array($char, $aEnd)) { - if ($bIncludeEnd) { - $out .= $char; - } elseif (!$consumeEnd) { - $this->iCurrentPosition -= $this->strlen($char); - } - return $out; - } - $out .= $char; - if ($comment = $this->consumeComment()) { - $comments[] = $comment; - } - } - - $this->iCurrentPosition = $start; - throw new UnexpectedTokenException('One of ("'.implode('","', $aEnd).'")', $this->peek(5), 'search', $this->iLineNo); - } - - private function inputLeft() { - return $this->substr($this->iCurrentPosition, -1); - } - - private function substr($iStart, $iLength) { - if ($iLength < 0) { - $iLength = $this->iLength - $iStart + $iLength; - } - if ($iStart + $iLength > $this->iLength) { - $iLength = $this->iLength - $iStart; - } - $sResult = ''; - while ($iLength > 0) { - $sResult .= $this->aText[$iStart]; - $iStart++; - $iLength--; - } - return $sResult; - } - - private function strlen($sString) { - if ($this->oParserSettings->bMultibyteSupport) { - return mb_strlen($sString, $this->sCharset); - } else { - return strlen($sString); - } - } - - private function streql($sString1, $sString2, $bCaseInsensitive = true) { - if($bCaseInsensitive) { - return $this->strtolower($sString1) === $this->strtolower($sString2); - } else { - return $sString1 === $sString2; - } - } - - private function strtolower($sString) { - if ($this->oParserSettings->bMultibyteSupport) { - return mb_strtolower($sString, $this->sCharset); - } else { - return strtolower($sString); - } - } - - private function strsplit($sString) { - if ($this->oParserSettings->bMultibyteSupport) { - if ($this->streql($this->sCharset, 'utf-8')) { - return preg_split('//u', $sString, null, PREG_SPLIT_NO_EMPTY); - } else { - $iLength = mb_strlen($sString, $this->sCharset); - $aResult = array(); - for ($i = 0; $i < $iLength; ++$i) { - $aResult[] = mb_substr($sString, $i, 1, $this->sCharset); - } - return $aResult; - } - } else { - if($sString === '') { - return array(); - } else { - return str_split($sString); - } - } - } - - private function strpos($sString, $sNeedle, $iOffset) { - if ($this->oParserSettings->bMultibyteSupport) { - return mb_strpos($sString, $sNeedle, $iOffset, $this->sCharset); - } else { - return strpos($sString, $sNeedle, $iOffset); - } - } - - private function mapRange($fVal, $fFromMin, $fFromMax, $fToMin, $fToMax) { - $fFromRange = $fFromMax - $fFromMin; - $fToRange = $fToMax - $fToMin; - $fMultiplier = $fToRange / $fFromRange; - $fNewVal = $fVal - $fFromMin; - $fNewVal *= $fMultiplier; - return $fNewVal + $fToMin; + $this->oParserState = new ParserState($this->sText, $this->oParserSettings); + return Document::parse($this->oParserState); } } diff --git a/lib/Sabberworm/CSS/Parsing/ParserState.php b/lib/Sabberworm/CSS/Parsing/ParserState.php new file mode 100644 index 00000000..4305c9a0 --- /dev/null +++ b/lib/Sabberworm/CSS/Parsing/ParserState.php @@ -0,0 +1,310 @@ +oParserSettings = $oParserSettings; + $this->sText = $sText; + $this->iCurrentPosition = 0; + $this->iLineNo = $iLineNo; + $this->setCharset($this->oParserSettings->sDefaultCharset); + } + + public function setCharset($sCharset) { + $this->sCharset = $sCharset; + $this->aText = $this->strsplit($this->sText); + $this->iLength = count($this->aText); + } + + public function getCharset() { + $this->oParserHelper->getCharset(); + return $this->sCharset; + } + + public function currentLine() { + return $this->iLineNo; + } + + public function getSettings() { + return $this->oParserSettings; + } + + public function parseIdentifier($bIgnoreCase = true) { + $sResult = $this->parseCharacter(true); + if ($sResult === null) { + throw new UnexpectedTokenException($sResult, $this->peek(5), 'identifier', $this->iLineNo); + } + $sCharacter = null; + while (($sCharacter = $this->parseCharacter(true)) !== null) { + $sResult .= $sCharacter; + } + if ($bIgnoreCase) { + $sResult = $this->strtolower($sResult); + } + return $sResult; + } + + public function parseCharacter($bIsForIdentifier) { + if ($this->peek() === '\\') { + if ($bIsForIdentifier && $this->oParserSettings->bLenientParsing && ($this->comes('\0') || $this->comes('\9'))) { + // Non-strings can contain \0 or \9 which is an IE hack supported in lenient parsing. + return null; + } + $this->consume('\\'); + if ($this->comes('\n') || $this->comes('\r')) { + return ''; + } + if (preg_match('/[0-9a-fA-F]/Su', $this->peek()) === 0) { + return $this->consume(1); + } + $sUnicode = $this->consumeExpression('/^[0-9a-fA-F]{1,6}/u', 6); + if ($this->strlen($sUnicode) < 6) { + //Consume whitespace after incomplete unicode escape + if (preg_match('/\\s/isSu', $this->peek())) { + if ($this->comes('\r\n')) { + $this->consume(2); + } else { + $this->consume(1); + } + } + } + $iUnicode = intval($sUnicode, 16); + $sUtf32 = ""; + for ($i = 0; $i < 4; ++$i) { + $sUtf32 .= chr($iUnicode & 0xff); + $iUnicode = $iUnicode >> 8; + } + return iconv('utf-32le', $this->sCharset, $sUtf32); + } + if ($bIsForIdentifier) { + $peek = ord($this->peek()); + // Ranges: a-z A-Z 0-9 - _ + if (($peek >= 97 && $peek <= 122) || + ($peek >= 65 && $peek <= 90) || + ($peek >= 48 && $peek <= 57) || + ($peek === 45) || + ($peek === 95) || + ($peek > 0xa1)) { + return $this->consume(1); + } + } else { + return $this->consume(1); + } + return null; + } + + public function consumeWhiteSpace() { + $comments = array(); + do { + while (preg_match('/\\s/isSu', $this->peek()) === 1) { + $this->consume(1); + } + if($this->oParserSettings->bLenientParsing) { + try { + $oComment = $this->consumeComment(); + } catch(UnexpectedTokenException $e) { + // When we can’t find the end of a comment, we assume the document is finished. + $this->iCurrentPosition = $this->iLength; + return; + } + } else { + $oComment = $this->consumeComment(); + } + if ($oComment !== false) { + $comments[] = $oComment; + } + } while($oComment !== false); + return $comments; + } + + public function comes($sString, $bCaseInsensitive = false) { + $sPeek = $this->peek(strlen($sString)); + return ($sPeek == '') + ? false + : $this->streql($sPeek, $sString, $bCaseInsensitive); + } + + public function peek($iLength = 1, $iOffset = 0) { + $iOffset += $this->iCurrentPosition; + if ($iOffset >= $this->iLength) { + return ''; + } + return $this->substr($iOffset, $iLength); + } + + public function consume($mValue = 1) { + if (is_string($mValue)) { + $iLineCount = substr_count($mValue, "\n"); + $iLength = $this->strlen($mValue); + if (!$this->streql($this->substr($this->iCurrentPosition, $iLength), $mValue)) { + throw new UnexpectedTokenException($mValue, $this->peek(max($iLength, 5)), $this->iLineNo); + } + $this->iLineNo += $iLineCount; + $this->iCurrentPosition += $this->strlen($mValue); + return $mValue; + } else { + if ($this->iCurrentPosition + $mValue > $this->iLength) { + throw new UnexpectedTokenException($mValue, $this->peek(5), 'count', $this->iLineNo); + } + $sResult = $this->substr($this->iCurrentPosition, $mValue); + $iLineCount = substr_count($sResult, "\n"); + $this->iLineNo += $iLineCount; + $this->iCurrentPosition += $mValue; + return $sResult; + } + } + + public function consumeExpression($mExpression, $iMaxLength = null) { + $aMatches = null; + $sInput = $iMaxLength !== null ? $this->peek($iMaxLength) : $this->inputLeft(); + if (preg_match($mExpression, $sInput, $aMatches, PREG_OFFSET_CAPTURE) === 1) { + return $this->consume($aMatches[0][0]); + } + throw new UnexpectedTokenException($mExpression, $this->peek(5), 'expression', $this->iLineNo); + } + + /** + * @return false|Comment + */ + public function consumeComment() { + $mComment = false; + if ($this->comes('/*')) { + $iLineNo = $this->iLineNo; + $this->consume(1); + $mComment = ''; + while (($char = $this->consume(1)) !== '') { + $mComment .= $char; + if ($this->comes('*/')) { + $this->consume(2); + break; + } + } + } + + if ($mComment !== false) { + // We skip the * which was included in the comment. + return new Comment(substr($mComment, 1), $iLineNo); + } + + return $mComment; + } + + public function isEnd() { + return $this->iCurrentPosition >= $this->iLength; + } + + public function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false, array &$comments = array()) { + $aEnd = is_array($aEnd) ? $aEnd : array($aEnd); + $out = ''; + $start = $this->iCurrentPosition; + + while (($char = $this->consume(1)) !== '') { + if (in_array($char, $aEnd)) { + if ($bIncludeEnd) { + $out .= $char; + } elseif (!$consumeEnd) { + $this->iCurrentPosition -= $this->strlen($char); + } + return $out; + } + $out .= $char; + if ($comment = $this->consumeComment()) { + $comments[] = $comment; + } + } + + $this->iCurrentPosition = $start; + throw new UnexpectedTokenException('One of ("'.implode('","', $aEnd).'")', $this->peek(5), 'search', $this->iLineNo); + } + + private function inputLeft() { + return $this->substr($this->iCurrentPosition, -1); + } + + public function streql($sString1, $sString2, $bCaseInsensitive = true) { + if($bCaseInsensitive) { + return $this->strtolower($sString1) === $this->strtolower($sString2); + } else { + return $sString1 === $sString2; + } + } + + public function backtrack($iAmount) { + $this->iCurrentPosition -= $iAmount; + } + + public function strlen($sString) { + if ($this->oParserSettings->bMultibyteSupport) { + return mb_strlen($sString, $this->sCharset); + } else { + return strlen($sString); + } + } + + private function substr($iStart, $iLength) { + if ($iLength < 0) { + $iLength = $this->iLength - $iStart + $iLength; + } + if ($iStart + $iLength > $this->iLength) { + $iLength = $this->iLength - $iStart; + } + $sResult = ''; + while ($iLength > 0) { + $sResult .= $this->aText[$iStart]; + $iStart++; + $iLength--; + } + return $sResult; + } + + private function strtolower($sString) { + if ($this->oParserSettings->bMultibyteSupport) { + return mb_strtolower($sString, $this->sCharset); + } else { + return strtolower($sString); + } + } + + private function strsplit($sString) { + if ($this->oParserSettings->bMultibyteSupport) { + if ($this->streql($this->sCharset, 'utf-8')) { + return preg_split('//u', $sString, null, PREG_SPLIT_NO_EMPTY); + } else { + $iLength = mb_strlen($sString, $this->sCharset); + $aResult = array(); + for ($i = 0; $i < $iLength; ++$i) { + $aResult[] = mb_substr($sString, $i, 1, $this->sCharset); + } + return $aResult; + } + } else { + if($sString === '') { + return array(); + } else { + return str_split($sString); + } + } + } + + private function strpos($sString, $sNeedle, $iOffset) { + if ($this->oParserSettings->bMultibyteSupport) { + return mb_strpos($sString, $sNeedle, $iOffset, $this->sCharset); + } else { + return strpos($sString, $sNeedle, $iOffset); + } + } +} \ No newline at end of file diff --git a/lib/Sabberworm/CSS/Property/AtRule.php b/lib/Sabberworm/CSS/Property/AtRule.php index de3eea19..b20c8c6e 100644 --- a/lib/Sabberworm/CSS/Property/AtRule.php +++ b/lib/Sabberworm/CSS/Property/AtRule.php @@ -6,9 +6,10 @@ use Sabberworm\CSS\Comment\Commentable; interface AtRule extends Renderable, Commentable { - const BLOCK_RULES = 'media/document/supports/region-style/font-feature-values'; // Since there are more set rules than block rules, we’re whitelisting the block rules and have anything else be treated as a set rule. - const SET_RULES = 'font-face/counter-style/page/swash/styleset/annotation'; //…and more font-specific ones (to be used inside font-feature-values) + const BLOCK_RULES = 'media/document/supports/region-style/font-feature-values'; + // …and more font-specific ones (to be used inside font-feature-values) + const SET_RULES = 'font-face/counter-style/page/swash/styleset/annotation'; public function atRuleName(); public function atRuleArgs(); diff --git a/lib/Sabberworm/CSS/Rule/Rule.php b/lib/Sabberworm/CSS/Rule/Rule.php index 3e485375..3fa031bd 100644 --- a/lib/Sabberworm/CSS/Rule/Rule.php +++ b/lib/Sabberworm/CSS/Rule/Rule.php @@ -2,10 +2,11 @@ namespace Sabberworm\CSS\Rule; +use Sabberworm\CSS\Comment\Commentable; +use Sabberworm\CSS\Parsing\ParserState; use Sabberworm\CSS\Renderable; use Sabberworm\CSS\Value\RuleValueList; use Sabberworm\CSS\Value\Value; -use Sabberworm\CSS\Comment\Commentable; /** * RuleSets contains Rule objects which always have a key and a value. @@ -29,6 +30,44 @@ public function __construct($sRule, $iLineNo = 0) { $this->aComments = array(); } + public static function parse(ParserState $oParserState) { + $aComments = $oParserState->consumeWhiteSpace(); + $oRule = new Rule($oParserState->parseIdentifier(), $oParserState->currentLine()); + $oRule->setComments($aComments); + $oRule->addComments($oParserState->consumeWhiteSpace()); + $oParserState->consume(':'); + $oValue = Value::parseValue($oParserState, self::listDelimiterForRule($oRule->getRule())); + $oRule->setValue($oValue); + if ($oParserState->getSettings()->bLenientParsing) { + while ($oParserState->comes('\\')) { + $oParserState->consume('\\'); + $oRule->addIeHack($oParserState->consume()); + $oParserState->consumeWhiteSpace(); + } + } + $oParserState->consumeWhiteSpace(); + if ($oParserState->comes('!')) { + $oParserState->consume('!'); + $oParserState->consumeWhiteSpace(); + $oParserState->consume('important'); + $oRule->setIsImportant(true); + } + $oParserState->consumeWhiteSpace(); + while ($oParserState->comes(';')) { + $oParserState->consume(';'); + } + $oParserState->consumeWhiteSpace(); + + return $oRule; + } + + private static function listDelimiterForRule($sRule) { + if (preg_match('/^font($|-)/', $sRule)) { + return array(',', '/', ' '); + } + return array(',', ' ', '/'); + } + /** * @return int */ diff --git a/lib/Sabberworm/CSS/RuleSet/DeclarationBlock.php b/lib/Sabberworm/CSS/RuleSet/DeclarationBlock.php index e18f5d82..451eaa7e 100644 --- a/lib/Sabberworm/CSS/RuleSet/DeclarationBlock.php +++ b/lib/Sabberworm/CSS/RuleSet/DeclarationBlock.php @@ -2,6 +2,8 @@ namespace Sabberworm\CSS\RuleSet; +use Sabberworm\CSS\Parsing\ParserState; +use Sabberworm\CSS\Parsing\OutputException; use Sabberworm\CSS\Property\Selector; use Sabberworm\CSS\Rule\Rule; use Sabberworm\CSS\Value\RuleValueList; @@ -9,7 +11,6 @@ use Sabberworm\CSS\Value\Size; use Sabberworm\CSS\Value\Color; use Sabberworm\CSS\Value\URL; -use Sabberworm\CSS\Parsing\OutputException; /** * Declaration blocks are the parts of a css file which denote the rules belonging to a selector. @@ -24,6 +25,16 @@ public function __construct($iLineNo = 0) { $this->aSelectors = array(); } + public static function parse(ParserState $oParserState) { + $aComments = array(); + $oResult = new DeclarationBlock($oParserState->currentLine()); + $oResult->setSelector($oParserState->consumeUntil('{', false, true, $aComments)); + $oResult->setComments($aComments); + RuleSet::parseRuleSet($oParserState, $oResult); + return $oResult; + } + + public function setSelectors($mSelector) { if (is_array($mSelector)) { $this->aSelectors = $mSelector; diff --git a/lib/Sabberworm/CSS/RuleSet/RuleSet.php b/lib/Sabberworm/CSS/RuleSet/RuleSet.php index 124be88d..42b66509 100644 --- a/lib/Sabberworm/CSS/RuleSet/RuleSet.php +++ b/lib/Sabberworm/CSS/RuleSet/RuleSet.php @@ -2,9 +2,11 @@ namespace Sabberworm\CSS\RuleSet; -use Sabberworm\CSS\Rule\Rule; -use Sabberworm\CSS\Renderable; use Sabberworm\CSS\Comment\Commentable; +use Sabberworm\CSS\Parsing\ParserState; +use Sabberworm\CSS\Parsing\UnexpectedTokenException; +use Sabberworm\CSS\Renderable; +use Sabberworm\CSS\Rule\Rule; /** * RuleSet is a generic superclass denoting rules. The typical example for rule sets are declaration block. @@ -22,6 +24,41 @@ public function __construct($iLineNo = 0) { $this->aComments = array(); } + public static function parseRuleSet(ParserState $oParserState, RuleSet $oRuleSet) { + while ($oParserState->comes(';')) { + $oParserState->consume(';'); + } + while (!$oParserState->comes('}')) { + $oRule = null; + if($oParserState->getSettings()->bLenientParsing) { + try { + $oRule = Rule::parse($oParserState); + } catch (UnexpectedTokenException $e) { + try { + $sConsume = $oParserState->consumeUntil(array("\n", ";", '}'), true); + // We need to “unfind” the matches to the end of the ruleSet as this will be matched later + if($oParserState->streql(substr($sConsume, -1), '}')) { + $oParserState->backtrack(1); + } else { + while ($oParserState->comes(';')) { + $oParserState->consume(';'); + } + } + } catch (UnexpectedTokenException $e) { + // We’ve reached the end of the document. Just close the RuleSet. + return; + } + } + } else { + $oRule = Rule::parse($oParserState); + } + if($oRule) { + $oRuleSet->addRule($oRule); + } + } + $oParserState->consume('}'); + } + /** * @return int */ diff --git a/lib/Sabberworm/CSS/Value/CSSString.php b/lib/Sabberworm/CSS/Value/CSSString.php index b0700081..9f9c050e 100644 --- a/lib/Sabberworm/CSS/Value/CSSString.php +++ b/lib/Sabberworm/CSS/Value/CSSString.php @@ -2,6 +2,9 @@ namespace Sabberworm\CSS\Value; +use Sabberworm\CSS\Parsing\ParserState; +use Sabberworm\CSS\Parsing\SourceException; + class CSSString extends PrimitiveValue { private $sString; @@ -11,6 +14,37 @@ public function __construct($sString, $iLineNo = 0) { parent::__construct($iLineNo); } + public static function parse(ParserState $oParserState) { + $sBegin = $oParserState->peek(); + $sQuote = null; + if ($sBegin === "'") { + $sQuote = "'"; + } else if ($sBegin === '"') { + $sQuote = '"'; + } + if ($sQuote !== null) { + $oParserState->consume($sQuote); + } + $sResult = ""; + $sContent = null; + if ($sQuote === null) { + // Unquoted strings end in whitespace or with braces, brackets, parentheses + while (!preg_match('/[\\s{}()<>\\[\\]]/isu', $oParserState->peek())) { + $sResult .= $oParserState->parseCharacter(false); + } + } else { + while (!$oParserState->comes($sQuote)) { + $sContent = $oParserState->parseCharacter(false); + if ($sContent === null) { + throw new SourceException("Non-well-formed quoted string {$oParserState->peek(3)}", $oParserState->currentLine()); + } + $sResult .= $sContent; + } + $oParserState->consume($sQuote); + } + return new CSSString($sResult, $oParserState->currentLine()); + } + public function setString($sString) { $this->sString = $sString; } diff --git a/lib/Sabberworm/CSS/Value/CalcFunction.php b/lib/Sabberworm/CSS/Value/CalcFunction.php index 0dba1b85..92475209 100644 --- a/lib/Sabberworm/CSS/Value/CalcFunction.php +++ b/lib/Sabberworm/CSS/Value/CalcFunction.php @@ -2,7 +2,61 @@ namespace Sabberworm\CSS\Value; +use Sabberworm\CSS\Parsing\ParserState; +use Sabberworm\CSS\Parsing\UnexpectedTokenException; + class CalcFunction extends CSSFunction { const T_OPERAND = 1; const T_OPERATOR = 2; + + public static function parse(ParserState $oParserState) { + $aOperators = array('+', '-', '*', '/'); + $sFunction = trim($oParserState->consumeUntil('(', false, true)); + $oCalcList = new CalcRuleValueList($oParserState->currentLine()); + $oList = new RuleValueList(',', $oParserState->currentLine()); + $iNestingLevel = 0; + $iLastComponentType = NULL; + while(!$oParserState->comes(')') || $iNestingLevel > 0) { + $oParserState->consumeWhiteSpace(); + if ($oParserState->comes('(')) { + $iNestingLevel++; + $oCalcList->addListComponent($oParserState->consume(1)); + continue; + } else if ($oParserState->comes(')')) { + $iNestingLevel--; + $oCalcList->addListComponent($oParserState->consume(1)); + continue; + } + if ($iLastComponentType != CalcFunction::T_OPERAND) { + $oVal = Value::parsePrimitiveValue($oParserState); + $oCalcList->addListComponent($oVal); + $iLastComponentType = CalcFunction::T_OPERAND; + } else { + if (in_array($oParserState->peek(), $aOperators)) { + if (($oParserState->comes('-') || $oParserState->comes('+'))) { + if ($oParserState->peek(1, -1) != ' ' || !($oParserState->comes('- ') || $oParserState->comes('+ '))) { + throw new UnexpectedTokenException(" {$oParserState->peek()} ", $oParserState->peek(1, -1) . $oParserState->peek(2), 'literal', $oParserState->currentLine()); + } + } + $oCalcList->addListComponent($oParserState->consume(1)); + $iLastComponentType = CalcFunction::T_OPERATOR; + } else { + throw new UnexpectedTokenException( + sprintf( + 'Next token was expected to be an operand of type %s. Instead "%s" was found.', + implode(', ', $aOperators), + $oVal + ), + '', + 'custom', + $oParserState->currentLine() + ); + } + } + } + $oList->addListComponent($oCalcList); + $oParserState->consume(')'); + return new CalcFunction($sFunction, $oList, ',', $oParserState->currentLine()); + } + } diff --git a/lib/Sabberworm/CSS/Value/Color.php b/lib/Sabberworm/CSS/Value/Color.php index e05b924a..c6ed9b18 100644 --- a/lib/Sabberworm/CSS/Value/Color.php +++ b/lib/Sabberworm/CSS/Value/Color.php @@ -2,12 +2,66 @@ namespace Sabberworm\CSS\Value; +use Sabberworm\CSS\Parsing\ParserState; + class Color extends CSSFunction { public function __construct($aColor, $iLineNo = 0) { parent::__construct(implode('', array_keys($aColor)), $aColor, ',', $iLineNo); } + public static function parse(ParserState $oParserState) { + $aColor = array(); + if ($oParserState->comes('#')) { + $oParserState->consume('#'); + $sValue = $oParserState->parseIdentifier(false); + if ($oParserState->strlen($sValue) === 3) { + $sValue = $sValue[0] . $sValue[0] . $sValue[1] . $sValue[1] . $sValue[2] . $sValue[2]; + } else if ($oParserState->strlen($sValue) === 4) { + $sValue = $sValue[0] . $sValue[0] . $sValue[1] . $sValue[1] . $sValue[2] . $sValue[2] . $sValue[3] . $sValue[3]; + } + + if ($oParserState->strlen($sValue) === 8) { + $aColor = array( + 'r' => new Size(intval($sValue[0] . $sValue[1], 16), null, true, $oParserState->currentLine()), + 'g' => new Size(intval($sValue[2] . $sValue[3], 16), null, true, $oParserState->currentLine()), + 'b' => new Size(intval($sValue[4] . $sValue[5], 16), null, true, $oParserState->currentLine()), + 'a' => new Size(round(self::mapRange(intval($sValue[6] . $sValue[7], 16), 0, 255, 0, 1), 2), null, true, $oParserState->currentLine()) + ); + } else { + $aColor = array( + 'r' => new Size(intval($sValue[0] . $sValue[1], 16), null, true, $oParserState->currentLine()), + 'g' => new Size(intval($sValue[2] . $sValue[3], 16), null, true, $oParserState->currentLine()), + 'b' => new Size(intval($sValue[4] . $sValue[5], 16), null, true, $oParserState->currentLine()) + ); + } + } else { + $sColorMode = $oParserState->parseIdentifier(true); + $oParserState->consumeWhiteSpace(); + $oParserState->consume('('); + $iLength = $oParserState->strlen($sColorMode); + for ($i = 0; $i < $iLength; ++$i) { + $oParserState->consumeWhiteSpace(); + $aColor[$sColorMode[$i]] = Size::parse($oParserState, true); + $oParserState->consumeWhiteSpace(); + if ($i < ($iLength - 1)) { + $oParserState->consume(','); + } + } + $oParserState->consume(')'); + } + return new Color($aColor, $oParserState->currentLine()); + } + + private static function mapRange($fVal, $fFromMin, $fFromMax, $fToMin, $fToMax) { + $fFromRange = $fFromMax - $fFromMin; + $fToRange = $fToMax - $fToMin; + $fMultiplier = $fToRange / $fFromRange; + $fNewVal = $fVal - $fFromMin; + $fNewVal *= $fMultiplier; + return $fNewVal + $fToMin; + } + public function getColor() { return $this->aComponents; } diff --git a/lib/Sabberworm/CSS/Value/LineName.php b/lib/Sabberworm/CSS/Value/LineName.php index bc1fb85b..eb7392d7 100644 --- a/lib/Sabberworm/CSS/Value/LineName.php +++ b/lib/Sabberworm/CSS/Value/LineName.php @@ -2,11 +2,34 @@ namespace Sabberworm\CSS\Value; +use Sabberworm\CSS\Parsing\ParserState; +use Sabberworm\CSS\Parsing\UnexpectedTokenException; + class LineName extends ValueList { public function __construct($aComponents = array(), $iLineNo = 0) { parent::__construct($aComponents, ' ', $iLineNo); } + public static function parse(ParserState $oParserState) { + $oParserState->consume('['); + $oParserState->consumeWhiteSpace(); + $aNames = array(); + do { + if($oParserState->getSettings()->bLenientParsing) { + try { + $aNames[] = $oParserState->parseIdentifier(); + } catch(UnexpectedTokenException $e) {} + } else { + $aNames[] = $oParserState->parseIdentifier(); + } + $oParserState->consumeWhiteSpace(); + } while (!$oParserState->comes(']')); + $oParserState->consume(']'); + return new LineName($aNames, $oParserState->currentLine()); + } + + + public function __toString() { return $this->render(new \Sabberworm\CSS\OutputFormat()); } diff --git a/lib/Sabberworm/CSS/Value/Size.php b/lib/Sabberworm/CSS/Value/Size.php index 9ad5eb08..9728a7b9 100644 --- a/lib/Sabberworm/CSS/Value/Size.php +++ b/lib/Sabberworm/CSS/Value/Size.php @@ -2,12 +2,16 @@ namespace Sabberworm\CSS\Value; +use Sabberworm\CSS\Parsing\ParserState; + class Size extends PrimitiveValue { const ABSOLUTE_SIZE_UNITS = 'px/cm/mm/mozmm/in/pt/pc/vh/vw/vm/vmin/vmax/rem'; //vh/vw/vm(ax)/vmin/rem are absolute insofar as they don’t scale to the immediate parent (only the viewport) const RELATIVE_SIZE_UNITS = '%/em/ex/ch/fr'; const NON_SIZE_UNITS = 'deg/grad/rad/s/ms/turns/Hz/kHz'; + private static $SIZE_UNITS = null; + private $fSize; private $sUnit; private $bIsColorComponent; @@ -19,6 +23,50 @@ public function __construct($fSize, $sUnit = null, $bIsColorComponent = false, $ $this->bIsColorComponent = $bIsColorComponent; } + public static function parse(ParserState $oParserState, $bIsColorComponent = false) { + $sSize = ''; + if ($oParserState->comes('-')) { + $sSize .= $oParserState->consume('-'); + } + while (is_numeric($oParserState->peek()) || $oParserState->comes('.')) { + if ($oParserState->comes('.')) { + $sSize .= $oParserState->consume('.'); + } else { + $sSize .= $oParserState->consume(1); + } + } + + $sUnit = null; + foreach (self::getSizeUnits() as $iLength => &$aValues) { + $sKey = strtolower($oParserState->peek($iLength)); + if(array_key_exists($sKey, $aValues)) { + if (($sUnit = $aValues[$sKey]) !== null) { + $oParserState->consume($iLength); + break; + } + } + } + return new Size(floatval($sSize), $sUnit, $bIsColorComponent, $oParserState->currentLine()); + } + + private static function getSizeUnits() { + if(self::$SIZE_UNITS === null) { + self::$SIZE_UNITS = []; + foreach (explode('/', Size::ABSOLUTE_SIZE_UNITS.'/'.Size::RELATIVE_SIZE_UNITS.'/'.Size::NON_SIZE_UNITS) as $val) { + $iSize = strlen($val); + if(!isset(self::$SIZE_UNITS[$iSize])) { + self::$SIZE_UNITS[$iSize] = array(); + } + self::$SIZE_UNITS[$iSize][strtolower($val)] = $val; + } + + // FIXME: Should we not order the longest units first? + ksort(self::$SIZE_UNITS, SORT_NUMERIC); + } + + return self::$SIZE_UNITS; + } + public function setUnit($sUnit) { $this->sUnit = $sUnit; } diff --git a/lib/Sabberworm/CSS/Value/URL.php b/lib/Sabberworm/CSS/Value/URL.php index 02cf5812..b4f37e16 100644 --- a/lib/Sabberworm/CSS/Value/URL.php +++ b/lib/Sabberworm/CSS/Value/URL.php @@ -2,6 +2,7 @@ namespace Sabberworm\CSS\Value; +use Sabberworm\CSS\Parsing\ParserState; class URL extends PrimitiveValue { @@ -12,6 +13,23 @@ public function __construct(CSSString $oURL, $iLineNo = 0) { $this->oURL = $oURL; } + public static function parse(ParserState $oParserState) { + $bUseUrl = $oParserState->comes('url', true); + if ($bUseUrl) { + $oParserState->consume('url'); + $oParserState->consumeWhiteSpace(); + $oParserState->consume('('); + } + $oParserState->consumeWhiteSpace(); + $oResult = new URL(CSSString::parse($oParserState), $oParserState->currentLine()); + if ($bUseUrl) { + $oParserState->consumeWhiteSpace(); + $oParserState->consume(')'); + } + return $oResult; + } + + public function setURL(CSSString $oURL) { $this->oURL = $oURL; } diff --git a/lib/Sabberworm/CSS/Value/Value.php b/lib/Sabberworm/CSS/Value/Value.php index 5d30bd97..5c550498 100644 --- a/lib/Sabberworm/CSS/Value/Value.php +++ b/lib/Sabberworm/CSS/Value/Value.php @@ -2,23 +2,113 @@ namespace Sabberworm\CSS\Value; +use Sabberworm\CSS\Parsing\ParserState; use Sabberworm\CSS\Renderable; abstract class Value implements Renderable { - protected $iLineNo; - - public function __construct($iLineNo = 0) { - $this->iLineNo = $iLineNo; - } - - /** - * @return int - */ - public function getLineNo() { - return $this->iLineNo; - } - - //Methods are commented out because re-declaring them here is a fatal error in PHP < 5.3.9 + protected $iLineNo; + + public function __construct($iLineNo = 0) { + $this->iLineNo = $iLineNo; + } + + public static function parseValue(ParserState $oParserState, $aListDelimiters = array()) { + $aStack = array(); + $oParserState->consumeWhiteSpace(); + //Build a list of delimiters and parsed values + while (!($oParserState->comes('}') || $oParserState->comes(';') || $oParserState->comes('!') || $oParserState->comes(')') || $oParserState->comes('\\'))) { + if (count($aStack) > 0) { + $bFoundDelimiter = false; + foreach ($aListDelimiters as $sDelimiter) { + if ($oParserState->comes($sDelimiter)) { + array_push($aStack, $oParserState->consume($sDelimiter)); + $oParserState->consumeWhiteSpace(); + $bFoundDelimiter = true; + break; + } + } + if (!$bFoundDelimiter) { + //Whitespace was the list delimiter + array_push($aStack, ' '); + } + } + array_push($aStack, self::parsePrimitiveValue($oParserState)); + $oParserState->consumeWhiteSpace(); + } + //Convert the list to list objects + foreach ($aListDelimiters as $sDelimiter) { + if (count($aStack) === 1) { + return $aStack[0]; + } + $iStartPosition = null; + while (($iStartPosition = array_search($sDelimiter, $aStack, true)) !== false) { + $iLength = 2; //Number of elements to be joined + for ($i = $iStartPosition + 2; $i < count($aStack); $i+=2, ++$iLength) { + if ($sDelimiter !== $aStack[$i]) { + break; + } + } + $oList = new RuleValueList($sDelimiter, $oParserState->currentLine()); + for ($i = $iStartPosition - 1; $i - $iStartPosition + 1 < $iLength * 2; $i+=2) { + $oList->addListComponent($aStack[$i]); + } + array_splice($aStack, $iStartPosition - 1, $iLength * 2 - 1, array($oList)); + } + } + return $aStack[0]; + } + + public static function parseIdentifierOrFunction(ParserState $oParserState, $bIgnoreCase = false) { + $sResult = $oParserState->parseIdentifier($bIgnoreCase); + + if ($oParserState->comes('(')) { + $oParserState->consume('('); + $aArguments = Value::parseValue($oParserState, array('=', ' ', ',')); + $sResult = new CSSFunction($sResult, $aArguments, ',', $oParserState->currentLine()); + $oParserState->consume(')'); + } + + return $sResult; + } + + public static function parsePrimitiveValue(ParserState $oParserState) { + $oValue = null; + $oParserState->consumeWhiteSpace(); + if (is_numeric($oParserState->peek()) || ($oParserState->comes('-.') && is_numeric($oParserState->peek(1, 2))) || (($oParserState->comes('-') || $oParserState->comes('.')) && is_numeric($oParserState->peek(1, 1)))) { + $oValue = Size::parse($oParserState); + } else if ($oParserState->comes('#') || $oParserState->comes('rgb', true) || $oParserState->comes('hsl', true)) { + $oValue = Color::parse($oParserState); + } else if ($oParserState->comes('url', true)) { + $oValue = URL::parse($oParserState); + } else if ($oParserState->comes('calc', true) || $oParserState->comes('-webkit-calc', true) || $oParserState->comes('-moz-calc', true)) { + $oValue = CalcFunction::parse($oParserState); + } else if ($oParserState->comes("'") || $oParserState->comes('"')) { + $oValue = CSSString::parse($oParserState); + } else if ($oParserState->comes("progid:") && $oParserState->getSettings()->bLenientParsing) { + $oValue = self::parseMicrosoftFilter($oParserState); + } else if ($oParserState->comes("[")) { + $oValue = LineName::parse($oParserState); + } else { + $oValue = self::parseIdentifierOrFunction($oParserState); + } + $oParserState->consumeWhiteSpace(); + return $oValue; + } + + private static function parseMicrosoftFilter(ParserState $oParserState) { + $sFunction = $oParserState->consumeUntil('(', false, true); + $aArguments = Value::parseValue($oParserState, array(',', '=')); + return new CSSFunction($sFunction, $aArguments, ',', $oParserState->currentLine()); + } + + /** + * @return int + */ + public function getLineNo() { + return $this->iLineNo; + } + + //Methods are commented out because re-declaring them here is a fatal error in PHP < 5.3.9 //public abstract function __toString(); //public abstract function render(\Sabberworm\CSS\OutputFormat $oOutputFormat); } From de71450bf6e8dd4612617a64d791bf8221d18079 Mon Sep 17 00:00:00 2001 From: Raphael Schweikert Date: Fri, 19 Oct 2018 09:28:40 +0200 Subject: [PATCH 2/3] Use bin path to phpunit --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b755ca20..4731e182 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,4 +16,4 @@ matrix: dist: precise sudo: false before_script: rm composer.lock && composer install -script: ./vendor/phpunit/phpunit/phpunit +script: ./vendor/bin/phpunit From ff6fa22ccf49a0450a5c3258171377ab5991edd2 Mon Sep 17 00:00:00 2001 From: Raphael Schweikert Date: Fri, 19 Oct 2018 09:47:50 +0200 Subject: [PATCH 3/3] Restore PHP 5.3 and 5.4 compatibility --- lib/Sabberworm/CSS/Value/Size.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/Sabberworm/CSS/Value/Size.php b/lib/Sabberworm/CSS/Value/Size.php index 9728a7b9..f65246b5 100644 --- a/lib/Sabberworm/CSS/Value/Size.php +++ b/lib/Sabberworm/CSS/Value/Size.php @@ -37,7 +37,8 @@ public static function parse(ParserState $oParserState, $bIsColorComponent = fal } $sUnit = null; - foreach (self::getSizeUnits() as $iLength => &$aValues) { + $aSizeUnits = self::getSizeUnits(); + foreach($aSizeUnits as $iLength => &$aValues) { $sKey = strtolower($oParserState->peek($iLength)); if(array_key_exists($sKey, $aValues)) { if (($sUnit = $aValues[$sKey]) !== null) { @@ -51,7 +52,7 @@ public static function parse(ParserState $oParserState, $bIsColorComponent = fal private static function getSizeUnits() { if(self::$SIZE_UNITS === null) { - self::$SIZE_UNITS = []; + self::$SIZE_UNITS = array(); foreach (explode('/', Size::ABSOLUTE_SIZE_UNITS.'/'.Size::RELATIVE_SIZE_UNITS.'/'.Size::NON_SIZE_UNITS) as $val) { $iSize = strlen($val); if(!isset(self::$SIZE_UNITS[$iSize])) {