/*							-*-indented-text-*-
**  Tokenizer for CSS2
**
**  Works with any 8-bit encoding that includes ASCII,
**  such as ASCII, ISO-8859-1, or UTF-8. Doesn't work
**  with 16-bit encodings, because flex can't handle it.
**  (Doesn't decode UTF-8, though.)
**
**  Bert Bos <bert@w3.org>
**  26 November 1997
*/

%option case-insensitive yylineno

%{
#include <string.h>
#include "grammar.def"			/* Defines tokens */
#include "units.h"			/* Defines units CM, DEG, etc. */
char *curstr;				/* Stores value of some tokens */
double curval;				/* Stores value of some tokens */
#define yywrap() 1
%}

nonascii	[\200-\377]
unicode		\\[0-9a-f]{1,6}
escape		{unicode}|\\[ -~\200-\377]
nmstart		[a-z]|{nonascii}|{escape}
nmchar		[a-z0-9-]|{nonascii}|{escape}
string1		\"([\t !#$%&(-~]|\'|{nonascii}|{escape})*\"
string2		\'([\t !#$%&(-~]|\"|{nonascii}|{escape})*\'

ident		{nmstart}{nmchar}*
name		{nmchar}+
num		[0-9]+|[0-9]*"."[0-9]+
string		{string1}|{string2}
url		([!#$%&*-~]|{nonascii}|{escape})*
w		[ \t\r\n\f]*

h		[0-9a-f]
range		{h}{1,6}|{h}{1,5}\?|{h}{1,4}\?\?|{h}{1,3}\?\?\?|{h}{1,2}\?{4}|{h}?\?{5}

%s AFTER_IDENT

%%

[ \t\r\n\f]+			{BEGIN(INITIAL);}

\/\*[^*]*\*+([^/][^*]*\*+)*\/	/* ignore comments */

"<!--"				{return CDO;}
"-->"				{return CDC;}
"~="				{BEGIN(INITIAL); return INCLUDES;}

{string}			{curstr = strdup(yytext + 1);
				 curstr[yyleng-2] = '\0'; return STRING;}

{ident}				{BEGIN(AFTER_IDENT);
				 curstr = strdup(yytext); return IDENT;}

<AFTER_IDENT>"."{ident}		{curstr = strdup(yytext + 1);
				 return CLASS_AFTER_IDENT;}
"."{ident}			{BEGIN(AFTER_IDENT);
				 curstr = strdup(yytext + 1); return CLASS;}

<AFTER_IDENT>":"		{return COLON_AFTER_IDENT;}
":"				{return COLON;}

"#"{name}			{BEGIN(AFTER_IDENT);
				 curstr = strdup(yytext + 1); return HASH;}

"@import"			{BEGIN(INITIAL); return IMPORT_SYM;}
"@page"				{BEGIN(INITIAL); return PAGE_SYM;}
"@media"			{BEGIN(INITIAL); return MEDIA_SYM;}
"@font-face"			{BEGIN(INITIAL); return FONT_FACE_SYM;}
"@"{ident}			{BEGIN(INITIAL); curstr = strdup(yytext + 1);
				 return ATKEYWORD;}

"!important"			{return IMPORTANT_SYM;}

{num}em				{curval = atof(yytext); return EMS;}
{num}ex				{curval = atof(yytext); return EXS;}
{num}px				{curval = atof(yytext) * PX; return LENGTH;}
{num}cm				{curval = atof(yytext) * CM; return LENGTH;}
{num}mm				{curval = atof(yytext) * MM; return LENGTH;}
{num}in				{curval = atof(yytext) * IN; return LENGTH;}
{num}pt				{curval = atof(yytext) * PT; return LENGTH;}
{num}pc				{curval = atof(yytext) * PC; return LENGTH;}
{num}deg			{curval = atof(yytext) * DEG; return ANGLE;}
{num}rad			{curval = atof(yytext) * RAD; return ANGLE;}
{num}grad			{curval = atof(yytext) * GRAD; return ANGLE;}
{num}ms				{curval = atof(yytext) * MS; return TIME;}
{num}s				{curval = atof(yytext) * SEC; return TIME;}
{num}Hz				{curval = atof(yytext) * HZ; return FREQ;}
{num}kHz			{curval = atof(yytext) * KHZ; return FREQ;}
{num}{ident}			{curstr = strdup(yytext); return DIMEN;}
{num}%				{curval = atof(yytext); return PERCENTAGE;}
{num}				{curval = atof(yytext); return NUMBER;}

"url("{w}{string}{w}")"		{int i;
				 for (i = 4; yytext[i] != '"'
				  && yytext[i] != '\''; i++);
				 curstr = strdup(yytext + i + 1);
				 for (i = yyleng - i - 1;
				  curstr[i] != '"' && curstr[i] != '\''; i--);
				 curstr[i] = '\0'; return URL;}
"url("{w}{url}{w}")"		{int i;
				 for (i = 4; isspace(yytext[i]); i++);
				 curstr = strdup(yytext + i);
				 for (i = yyleng - i - 1; isspace(yytext[i]);
				  i--);
				 curstr[yyleng-i] = '\0'; return URL;}
{ident}"("			{curstr = strdup(yytext);
				 curstr[yyleng-1] = '\0'; return FUNCTION;}

U\+{range}			 |
U\+{h}{1,6}-{h}{1,6}		{BEGIN(INITIAL); return UNICODERANGE;}

.				{BEGIN(INITIAL); return *yytext;}


%%


/*

TODO: add regexp for UNICODERANGE

".2em" can be a class or a length. Is it too late to change
class from .{name} to .{ident} ?

Added {num}{ident} for future use as dimensioned number. Not backward
compatible. Problem?

Allowed TAB in strings, which was forgotten in CSS1

Added {ident}"(" as a FUNCTION token (rgb(), attr(), url(), etc.)

Current grammar/tokenizer allows "A:active" to be written as
"A: active". What to do?

Current {string} doesn't allow newlines, yet example in spec has a
newline. What to do?

*/
