Skip to content

Commit 9c82592

Browse files
committed
Resync tokenizer with spec.
1 parent 440c865 commit 9c82592

File tree

1 file changed

+69
-44
lines changed

1 file changed

+69
-44
lines changed

tokenizer.js

Lines changed: 69 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ function tokenize(str, options) {
9191
};
9292
var create = function(token) { currtoken = token; return true; };
9393
var parseerror = function() { console.log("Parse error at index " + i + ", processing codepoint 0x" + code.toString(16) + " in state " + state + ".");return true; };
94+
var catchfire = function(msg) { console.log("MAJOR SPEC ERROR: " + msg); return true;}
9495
var switchto = function(newstate) {
9596
state = newstate;
9697
//console.log('Switching to ' + state);
@@ -140,15 +141,14 @@ function tokenize(str, options) {
140141
else if(code == 0x2d) {
141142
if(next(1) == 0x2d && next(2) == 0x3e) consume(2) && emit(new CDCToken);
142143
else if(digit(next()) || (next(1) == 0x2e && digit(next(2)))) switchto("number") && reconsume();
143-
else if(namestartchar(next())) switchto("identifier") && reconsume();
144-
else emit(new DelimToken(code));
144+
else switchto('ident') && reconsume();
145145
}
146146
else if(code == 0x2e) {
147147
if(digit(next())) switchto("number") && reconsume();
148148
else emit(new DelimToken(code));
149149
}
150150
else if(code == 0x2f) {
151-
if(next() == 0x2a) switchto("comment");
151+
if(next() == 0x2a) consume() && switchto("comment");
152152
else emit(new DelimToken(code));
153153
}
154154
else if(code == 0x3a) emit(new ColonToken);
@@ -161,18 +161,17 @@ function tokenize(str, options) {
161161
else if(code == 0x5b) emit(new OpenSquareToken);
162162
else if(code == 0x5c) {
163163
if(badescape(next())) parseerror() && emit(new DelimToken(code));
164-
else switchto("identifier") && reconsume();
164+
else switchto('ident') && reconsume();
165165
}
166166
else if(code == 0x5d) emit(new CloseSquareToken);
167167
else if(code == 0x7b) emit(new OpenCurlyToken);
168168
else if(code == 0x7d) emit(new CloseCurlyToken);
169169
else if(digit(code)) switchto("number") && reconsume();
170170
else if(code == 0x55 || code == 0x75) {
171171
if(next(1) == 0x2b && hexdigit(next(2))) consume() && switchto("unicode-range");
172-
else if((next(1) == 0x52 || next(1) == 0x72) && (next(2) == 0x4c || next(2) == 0x6c) && (next(3) == 0x28)) consume(3) && switchto("url");
173-
else switchto("identifier") && reconsume();
172+
else switchto('ident') && reconsume();
174173
}
175-
else if(namestartchar(code)) switchto("identifier") && reconsume();
174+
else if(namestartchar(code)) switchto('ident') && reconsume();
176175
else if(eof()) { emit(new EOFToken); return tokens; }
177176
else emit(new DelimToken(code));
178177
break;
@@ -181,7 +180,7 @@ function tokenize(str, options) {
181180
if(currtoken == undefined) create(new StringToken);
182181

183182
if(code == 0x22) emit() && switchto("data");
184-
else if(eof()) parseerror() && emit() && switchto("data");
183+
else if(eof()) parseerror() && emit() && switchto("data") && reconsume();
185184
else if(newline(code)) parseerror() && emit(new BadStringToken) && switchto("data") && reconsume();
186185
else if(code == 0x5c) {
187186
if(badescape(next())) parseerror() && emit(new BadStringToken) && switchto("data");
@@ -217,7 +216,7 @@ function tokenize(str, options) {
217216
case "hash-rest":
218217
if(namechar(code)) currtoken.append(code);
219218
else if(code == 0x5c) {
220-
if(badescape(next())) parseerror() && emit(new DelimToken(0x23)) && switchto("data") && reconsume();
219+
if(badescape(next())) parseerror() && emit() && switchto("data") && reconsume();
221220
else currtoken.append(consumeEscape());
222221
}
223222
else emit() && switchto('data') && reconsume();
@@ -234,8 +233,9 @@ function tokenize(str, options) {
234233

235234
case "at-keyword":
236235
if(code == 0x2d) {
237-
if(namestartchar(next())) consume() && create(new AtKeywordToken([0x40,code])) && switchto('at-keyword-rest');
238-
else emit(new DelimToken(0x40)) && switchto('data') && reconsume();
236+
if(namestartchar(next())) create(new AtKeywordToken(0x2d)) && switchto('at-keyword-rest');
237+
else if(next(1) == 0x5c && !badescape(next(2))) create(new AtKeywordtoken(0x2d)) && switchto('at-keyword-rest');
238+
else parseerror() && emit(new DelimToken(0x40)) && switchto('data') && reconsume();
239239
}
240240
else if(namestartchar(code)) create(new AtKeywordToken(code)) && switchto('at-keyword-rest');
241241
else if(code == 0x5c) {
@@ -254,32 +254,36 @@ function tokenize(str, options) {
254254
else emit() && switchto('data') && reconsume();
255255
break;
256256

257-
case "identifier":
257+
case "ident":
258258
if(code == 0x2d) {
259-
if(namestartchar(next())) create(new IdentifierToken(code)) && switchto('identifier-rest');
260-
else switchto('data') && reconsume();
259+
if(namestartchar(next())) create(new IdentifierToken(code)) && switchto('ident-rest');
260+
else if(next(1) == 0x5c && !badescape(next(2))) create(new IdentifierToken(code)) && switchto('ident-rest');
261+
else emit(new DelimToken(0x2d)) && switchto('data');
261262
}
262-
else if(namestartchar(code)) create(new IdentifierToken(code)) && switchto('identifier-rest');
263+
else if(namestartchar(code)) create(new IdentifierToken(code)) && switchto('ident-rest');
263264
else if(code == 0x5c) {
264265
if(badescape(next())) parseerror() && switchto("data") && reconsume();
265-
else create(new IdentifierToken(consumeEscape())) && switchto('identifier-rest');
266+
else create(new IdentifierToken(consumeEscape())) && switchto('ident-rest');
266267
}
267-
else switchto('data') && reconsume();
268+
else catchfire("Hit the generic 'else' clause in ident state.") && switchto('data') && reconsume();
268269
break;
269270

270-
case "identifier-rest":
271+
case "ident-rest":
271272
if(namechar(code)) currtoken.append(code);
272273
else if(code == 0x5c) {
273274
if(badescape(next())) parseerror() && emit() && switchto("data") && reconsume();
274275
else currtoken.append(consumeEscape());
275276
}
276-
else if(code == 0x28) emit(new FunctionToken(currtoken)) && switchto('data');
277-
else if(whitespace(code) && options.transformFunctionWhitespace) switchto('transform-function-whitespace');
277+
else if(code == 0x28) {
278+
if(currtoken.ASCIImatch('url')) switchto('url');
279+
else emit(new FunctionToken(currtoken)) && switchto('data');
280+
}
281+
else if(whitespace(code) && options.transformFunctionWhitespace) switchto('transform-function-whitespace') && reconsume();
278282
else emit() && switchto('data') && reconsume();
279283
break;
280284

281285
case "transform-function-whitespace":
282-
if(whitespace(code)) donothing();
286+
if(whitespace(next())) donothing();
283287
else if(code == 0x28) emit(new FunctionToken(currtoken)) && switchto('data');
284288
else emit() && switchto('data') && reconsume();
285289
break;
@@ -313,8 +317,7 @@ function tokenize(str, options) {
313317
}
314318
else if(code == 0x25) emit(new PercentageToken(currtoken)) && switchto('data');
315319
else if(code == 0x45 || code == 0x65) {
316-
if(!options.scientificNotation) create(new DimensionToken(currtoken,code)) && switchto('dimension');
317-
else if(digit(next())) consume() && currtoken.append([0x25,code]) && switchto('sci-notation');
320+
if(digit(next())) consume() && currtoken.append([0x25,code]) && switchto('sci-notation');
318321
else if((next(1) == 0x2b || next(1) == 0x2d) && digit(next(2))) currtoken.append([0x25,next(1),next(2)]) && consume(2) && switchto('sci-notation');
319322
else create(new DimensionToken(currtoken,code)) && switchto('dimension');
320323
}
@@ -326,7 +329,7 @@ function tokenize(str, options) {
326329
}
327330
else if(namestartchar(code)) create(new DimensionToken(currtoken, code)) && switchto('dimension');
328331
else if(code == 0x5c) {
329-
if(badescape(next)) emit() && switchto('data') && reconsume();
332+
if(badescape(next)) parseerror() && emit() && switchto('data') && reconsume();
330333
else create(new DimensionToken(currtoken,consumeEscape)) && switchto('dimension');
331334
}
332335
else emit() && switchto('data') && reconsume();
@@ -336,12 +339,10 @@ function tokenize(str, options) {
336339
currtoken.type = "number";
337340

338341
if(digit(code)) currtoken.append(code);
339-
else if(code == 0x2e) emit() && switchto('data') && reconsume();
340342
else if(code == 0x25) emit(new PercentageToken(currtoken)) && switchto('data');
341343
else if(code == 0x45 || code == 0x65) {
342-
if(!options.scientificNotation) create(new DimensionToken(currtoken,code)) && switchto('dimension');
343-
else if(digit(next())) consume() && currtoken.append([0x25,code]) && switchto('sci-notation');
344-
else if((next(1) == 0x2b || next(1) == 0x2d) && digit(next(2))) currtoken.append([0x25,next(1),next(2)]) && consume(2) && switchto('sci-notation');
344+
if(digit(next())) consume() && currtoken.append([0x65,code]) && switchto('sci-notation');
345+
else if((next(1) == 0x2b || next(1) == 0x2d) && digit(next(2))) currtoken.append([0x65,next(1),next(2)]) && consume(2) && switchto('sci-notation');
345346
else create(new DimensionToken(currtoken,code)) && switchto('dimension');
346347
}
347348
else if(code == 0x2d) {
@@ -352,8 +353,8 @@ function tokenize(str, options) {
352353
}
353354
else if(namestartchar(code)) create(new DimensionToken(currtoken, code)) && switchto('dimension');
354355
else if(code == 0x5c) {
355-
if(badescape(next)) emit() && switchto('data') && reconsume();
356-
else create(new DimensionToken(currtoken,consumeEscape)) && switchto('dimension');
356+
if(badescape(next)) parseerror() && emit() && switchto('data') && reconsume();
357+
else create(new DimensionToken(currtoken,consumeEscape())) && switchto('dimension');
357358
}
358359
else emit() && switchto('data') && reconsume();
359360
break;
@@ -368,22 +369,26 @@ function tokenize(str, options) {
368369
break;
369370

370371
case "sci-notation":
372+
currtoken.type = "number";
373+
371374
if(digit(code)) currtoken.append(code);
372375
else emit() && switchto('data') && reconsume();
373376
break;
374377

375378
case "url":
376-
if(code == 0x22) switchto('url-double-quote');
379+
if(eof()) parseerror() && emit(new BadURLToken) && switchto('data');
380+
else if(code == 0x22) switchto('url-double-quote');
377381
else if(code == 0x27) switchto('url-single-quote');
378382
else if(code == 0x29) emit(new URLToken) && switchto('data');
379383
else if(whitespace(code)) donothing();
380384
else switchto('url-unquoted') && reconsume();
381385
break;
382386

383387
case "url-double-quote":
384-
if(currtoken == undefined) create(new URLToken);
388+
if(! (currtoken instanceof URLToken)) create(new URLToken);
385389

386-
if(code == 0x22) switchto('url-end');
390+
if(eof()) parseerror() && emit(new BadURLToken) && switchto('data');
391+
else if(code == 0x22) switchto('url-end');
387392
else if(newline(code)) parseerror() && switchto('bad-url');
388393
else if(code == 0x5c) {
389394
if(newline(next())) consume();
@@ -394,9 +399,10 @@ function tokenize(str, options) {
394399
break;
395400

396401
case "url-single-quote":
397-
if(currtoken == undefined) create(new URLToken);
402+
if(! (currtoken instanceof URLToken)) create(new URLToken);
398403

399-
if(code == 0x27) switchto('url-end');
404+
if(eof()) parseerror() && emit(new BadURLToken) && switchto('data');
405+
else if(code == 0x27) switchto('url-end');
400406
else if(newline(code)) parseerror() && switchto('bad-url');
401407
else if(code == 0x5c) {
402408
if(newline(next())) consume();
@@ -407,15 +413,17 @@ function tokenize(str, options) {
407413
break;
408414

409415
case "url-end":
410-
if(whitespace(code)) donothing();
416+
if(eof()) parseerror() && emit(new BadURLToken) && switchto('data');
417+
else if(whitespace(code)) donothing();
411418
else if(code == 0x29) emit() && switchto('data');
412419
else parseerror() && switchto('bad-url') && reconsume();
413420
break;
414421

415422
case "url-unquoted":
416-
if(currtoken == undefined) create(new URLToken);
423+
if(! (currtoken instanceof URLToken)) create(new URLToken);
417424

418-
if(whitespace(code)) switchto('url-end');
425+
if(eof()) parseerror() && emit(new BadURLToken) && switchto('data');
426+
else if(whitespace(code)) switchto('url-end');
419427
else if(code == 0x29) emit() && switchto('data');
420428
else if(code == 0x22 || code == 0x27 || code == 0x28 || nonprintable(code)) parseerror() && switchto('bad-url');
421429
else if(code == 0x5c) {
@@ -426,10 +434,11 @@ function tokenize(str, options) {
426434
break;
427435

428436
case "bad-url":
429-
if(code == 0x29) emit(new BadURLToken) && switchto('data');
437+
if(eof()) parseerror() && emit(new BadURLToken) && switchto('data');
438+
else if(code == 0x29) emit(new BadURLToken) && switchto('data');
430439
else if(code == 0x5c) {
431440
if(badescape(next())) donothing();
432-
else consumeEscape()
441+
else consumeEscape();
433442
}
434443
else donothing();
435444
break;
@@ -476,7 +485,7 @@ function tokenize(str, options) {
476485
break;
477486

478487
default:
479-
console.log("Unknown state '" + state + "'");
488+
catchfire("Unknown state '" + state + "'");
480489
}
481490
}
482491
}
@@ -568,9 +577,25 @@ StringValuedToken.prototype.append = function(val) {
568577
return true;
569578
}
570579
StringValuedToken.prototype.finish = function() {
571-
this.value = stringFromCodeArray(this.value);
580+
this.value = this.valueAsString();
572581
return this;
573582
}
583+
StringValuedToken.prototype.ASCIImatch = function(str) {
584+
return this.valueAsString().toLowerCase() == str.toLowerCase();
585+
}
586+
StringValuedToken.prototype.valueAsString = function() {
587+
if(typeof this.value == 'string') return this.value;
588+
return stringFromCodeArray(this.value);
589+
}
590+
StringValuedToken.prototype.valueAsCodes = function() {
591+
if(typeof this.value == 'string') {
592+
var ret = [];
593+
for(var i = 0; i < this.value.length; i++)
594+
ret.push(this.value.charCodeAt(i));
595+
return ret;
596+
}
597+
return this.value.filter(function(e){return e;});
598+
}
574599

575600
function IdentifierToken(val) {
576601
this.value = [];
@@ -584,7 +609,7 @@ function FunctionToken(val) {
584609
// These are always constructed by passing an IdentifierToken
585610
this.value = val.finish().value;
586611
}
587-
FunctionToken.prototype = new CSSParserToken;
612+
FunctionToken.prototype = new StringValuedToken;
588613
FunctionToken.prototype.tokenType = "FUNCTION";
589614
FunctionToken.prototype.toString = function() { return "FUNCTION("+this.value+")"; }
590615

@@ -633,7 +658,7 @@ NumberToken.prototype.toString = function() {
633658
return "NUMBER("+this.value+")";
634659
}
635660
NumberToken.prototype.finish = function() {
636-
this.repr = stringFromCodeArray(this.value);
661+
this.repr = this.valueAsString();
637662
this.value = this.repr * 1;
638663
if(Math.abs(this.value) % 1 != 0) this.type = "number";
639664
return this;

0 commit comments

Comments
 (0)