@@ -62,6 +62,7 @@ class Parser
6262 private $ inParens ;
6363 private $ eatWhiteDefault ;
6464 private $ buffer ;
65+ private $ utf8 ;
6566
6667 /**
6768 * Constructor
@@ -70,12 +71,14 @@ class Parser
7071 *
7172 * @param string $sourceName
7273 * @param integer $sourceIndex
74+ * @param string $encoding
7375 */
74- public function __construct ($ sourceName , $ sourceIndex = 0 )
76+ public function __construct ($ sourceName , $ sourceIndex = 0 , $ encoding = ' utf-8 ' )
7577 {
7678 $ this ->sourceName = $ sourceName ?: '(stdin) ' ;
7779 $ this ->sourceIndex = $ sourceIndex ;
7880 $ this ->charset = null ;
81+ $ this ->utf8 = ! $ encoding || strtolower ($ encoding ) === 'utf-8 ' ;
7982
8083 if (empty (self ::$ operatorPattern )) {
8184 self ::$ operatorPattern = '([*\/%+-]|[!=]\=|\>\=?|\<\=\>|\<\=?|and|or) ' ;
@@ -85,7 +88,9 @@ public function __construct($sourceName, $sourceIndex = 0)
8588 $ commentMultiRight = '\*\/ ' ;
8689
8790 self ::$ commentPattern = $ commentMultiLeft . '.*? ' . $ commentMultiRight ;
88- self ::$ whitePattern = '/ ' . $ commentSingle . '[^\n]*\s*|( ' . self ::$ commentPattern . ')\s*|\s+/AisuS ' ;
91+ self ::$ whitePattern = $ this ->utf8
92+ ? '/ ' . $ commentSingle . '[^\n]*\s*|( ' . self ::$ commentPattern . ')\s*|\s+/AisuS '
93+ : '/ ' . $ commentSingle . '[^\n]*\s*|( ' . self ::$ commentPattern . ')\s*|\s+/AisS ' ;
8994 }
9095 }
9196
@@ -762,7 +767,7 @@ protected function peek($regex, &$out, $from = null)
762767 $ from = $ this ->count ;
763768 }
764769
765- $ r = '/ ' . $ regex . '/Aisu ' ;
770+ $ r = $ this -> utf8 ? '/ ' . $ regex . '/Aisu ' : ' / ' . $ regex . ' /Ais ' ;
766771 $ result = preg_match ($ r , $ this ->buffer , $ out , null , $ from );
767772
768773 return $ result ;
@@ -842,7 +847,7 @@ protected function match($regex, &$out, $eatWhitespace = null)
842847 $ eatWhitespace = $ this ->eatWhiteDefault ;
843848 }
844849
845- $ r = '/ ' . $ regex . '/Aisu ' ;
850+ $ r = $ this -> utf8 ? '/ ' . $ regex . '/Aisu ' : ' / ' . $ regex . ' /Ais ' ;
846851
847852 if (preg_match ($ r , $ this ->buffer , $ out , null , $ this ->count )) {
848853 $ this ->count += strlen ($ out [0 ]);
@@ -2235,7 +2240,9 @@ protected function variable(&$out)
22352240 protected function keyword (&$ word , $ eatWhitespace = null )
22362241 {
22372242 if ($ this ->match (
2238- '(([\pL\w_\-\*!" \']|[ \\\\].)([\pL\w\-_" \']|[ \\\\].)*) ' ,
2243+ $ this ->utf8
2244+ ? '(([\pL\w_\-\*!" \']|[ \\\\].)([\pL\w\-_" \']|[ \\\\].)*) '
2245+ : '(([\w_\-\*!" \']|[ \\\\].)([\w\-_" \']|[ \\\\].)*) ' ,
22392246 $ m ,
22402247 $ eatWhitespace
22412248 )) {
@@ -2256,7 +2263,12 @@ protected function keyword(&$word, $eatWhitespace = null)
22562263 */
22572264 protected function placeholder (&$ placeholder )
22582265 {
2259- if ($ this ->match ('([\pL\w\-_]+|#[{][$][\pL\w\-_]+[}]) ' , $ m )) {
2266+ if ($ this ->match (
2267+ $ this ->utf8
2268+ ? '([\pL\w\-_]+|#[{][$][\pL\w\-_]+[}]) '
2269+ : '([\w\-_]+|#[{][$][\w\-_]+[}]) ' ,
2270+ $ m
2271+ )) {
22602272 $ placeholder = $ m [1 ];
22612273
22622274 return true ;
0 commit comments