/*
 * scan.c -- lexical scanner for ll1gen
 *
 * 23 Feb 1993, Bert Bos <bert@let.rug.nl>
 */

#ifndef __lint
static char rcsid[] = "$Header: /u/hyctia/home/bert/CVSRepository/Work/LL1-1.3/scan.c,v 1.1 1997/02/19 01:09:02 bert Exp $";
#endif

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "export.h"
#include "types.h"
#include "ll1.def"
#include "errors.e"


EXPORT char curstr[MAXSTR];
EXPORT tsymbol sym;
EXPORT FILE *input;


static void skip_comment()
{
    char msg[100], prev, c = '\0';
    int lev = 0, oldline;

    oldline = lineno;
    do {
        prev = c;
        c = getc(input);
        if (c == EOF) {
	    sprintf(msg, "End of input inside comment at line %d", oldline);
	    error(msg);
	    return;
	}
	if (prev == '/' && c == '*') lev++;
	if (prev == '*' && c == '/') lev--;
        if (c == '\n') lineno++;
    } while (lev >= 0);
}

static tsymbol get_keyword()
{
    char s[100], msg[120];
    int i = 1;
    char c;

    s[0] = '%';
    while (isalpha((c = getc(input)))) s[i++] = c;
    ungetc(c, input);
    s[i] = '\0';
    if (strcmp(s, "%terminals") == 0) return TERMINALS;
    else if (strcmp(s, "%rules") == 0) return RULES;
    else if (strcmp(s, "%include") == 0) return INCLUDE;
    else if (strcmp(s, "%insertion") == 0) return INSERTION;
    else if (strcmp(s, "%deletion") == 0) return DELETION;
    else if (strcmp(s, "%scanner") == 0) return SCANNERSYM;
    else if (strcmp(s, "%parser") == 0) return PARSERSYM;
    else if (strcmp(s, "%noline") == 0) return NO_LINENOSYM;
    else if (strcmp(s, "%if") == 0) return IF;
    else {
        sprintf(msg, "Unknown keyword: \"%s\"", s);
        error(msg);
	return __EPSILON;
    }
}

static tsymbol get_ident(c)
    char c;
{
    int i = 1;

    curstr[0] = c;
    while (isalnum((c = getc(input))) || c == '_') curstr[i++] = c;
    curstr[i] = '\0';
    ungetc(c, input);
    return IDENTIFIER;
}

static tsymbol get_string()
{
    char c, prev = '\0';
    int i = 0;

    c = getc(input);
    while (c != '"') {
	if (c == EOF) {
	    error("Missing closing quote");
	    return __EPSILON;
	} else if (c != '\n') {
	    curstr[i++] = c;
	} else if (prev == '\\') {
	    lineno++;
	    curstr[i-1] = '\n';
	} else {
	    error("Missing closing quote");
	    lineno++;
	    return __EPSILON;
	}
	prev = c;
	c = getc(input);
    }
    curstr[i] = '\0';
    return STRING;
}

static tsymbol get_sysfile()
{
    char c;
    int i = 0;

    c = getc(input);
    while (c != '>') {
	if (c == EOF) {
	    error("Missing closing bracket \">\"");
	    return __EPSILON;
	}
	if (c == '\n') {
	    error("Missing closing bracket \">\"");
	    lineno++;
	    return __EPSILON;
	}
	curstr[i++] = c;
	c = getc(input);
    }
    curstr[i] = '\0';
    return SYSFILE;
}

static tsymbol get_param()
{
    char c, msg[100];
    int oldline, lev = 0, i = 0;
    Boolean squoted = False, dquoted = False;

    oldline = lineno;
    c = getc(input);
    while (c != ')' || lev != 0) {
        if (c == '\n') lineno++;
	if (c == '"') { if (!squoted) dquoted = !dquoted; }
	if (c == '\'') { if (!dquoted) squoted = !squoted; }
	if (c == EOF) {
	    sprintf(msg, "Missing closing parenthesis at line %d", oldline);
	    error(msg);
	    return __EPSILON;
	}
	if (c == '(') { if (!squoted && !dquoted) lev++; }
	if (c == ')') { if (!squoted && !dquoted) lev--; }
	curstr[i++] = c;
	c = getc(input);
    }
    curstr[i] = '\0';
    return PARAMS;
}

static tsymbol get_semantic()
{
    char c, msg[100];
    int oldline, lev = 0, i = 0;
    Boolean squoted = False, dquoted = False;

    oldline = lineno;
    c = getc(input);
    while (c != '}' || squoted || dquoted || lev != 0) {
        switch (c) {
            case '\n': lineno++; break;
            case '"': if (!squoted) dquoted = !dquoted; break;
            case '\'': if (!dquoted) squoted = !squoted; break;
            case '\\': curstr[i++] = c; c = getc(input); break;
            case '{': if (!squoted && !dquoted) lev++; break;
            case '}': if (!squoted && !dquoted) lev--; break;
            case EOF:
                sprintf(msg, "Missing closing brace at line %d", oldline);
                error(msg);
                return __EPSILON;
        }
	curstr[i++] = c;
	c = getc(input);
    }
    curstr[i] = '\0';
    return SEMANTIC;
}

static tsymbol get_char()
{
    char c, msg[100];

    curstr[1] = '\0';
    c = getc(input);
    if (c != '\\')
	curstr[0] = c;
    else {
	switch ((c = getc(input))) {
	case 'n': curstr[0] = '\n'; break;
	case 'r': curstr[0] = '\r'; break;
	case 'e': curstr[0] = '\033'; break;
	case 't': curstr[0] = '\t'; break;
	case 'b': curstr[0] = '\b'; break;
	case 'f': curstr[0] = '\f'; break;
	default:
	    if (c < '0' || '7' < c)		/* Literal character */
		curstr[0] = c;
	    else {				/* Octal notation */
		curstr[0] = c - '0';
		if ((c = getc(input)) < '0' || '7' < c)
		    ungetc(c, input);
		else {
		    curstr[0] = 8 * curstr[0] + c - '0';
		    if ((c = getc(input)) < '0' || '7' < c)
			ungetc(c, input);
		    else
			curstr[0] = 8 * curstr[0] + c - '0';
		}
	    }
	}
    }
    if (getc(input) != '\'') {
	sprintf(msg, "Unclosed character constant");
	error(msg);
	return  __EPSILON;
    }
    return CHAR;
}
	
EXPORT int nextsym()
{
    char c, msg[50];
    tsymbol sym;

    sym = __EPSILON;
    do {
        c = getc(input);
        switch (c) {
        case ' ': break;;
        case '\t': break;
        case '\n': lineno++; break;
        case '/':
            if ((c = getc(input)) == '*') skip_comment();
            else { ungetc(c, input); error("Unknown symbol \"/\""); }
            break;
        case EOF: sym = __ENDMARK; break;
        case ':': sym = COLON; break;
        case ';': sym = SEMICOLON; break;
        case '|': sym = BAR; break;
        case '~': sym = TILDE; break;
        case '[': sym = LBRACKET; break;
        case ']': sym = RBRACKET; break;
        case '+': sym = PLUS; break;
        case '*': sym = STAR; break;
        case '?': sym = QUESTION; break;
        case '"': sym = get_string(); break;
        case '<': sym = get_sysfile(); break;
        case '%': sym = get_keyword(); break;
	case '(': sym = get_param(); break;
	case '{': sym = get_semantic(); break;
	case '\'': sym = get_char(); break;
        default:
            if (isalpha(c) || c == '_')
                sym = get_ident(c);
            else {
                sprintf(msg, "Illegal letter \"%c\"", c);
                error(msg);
            }
        }
    } while (sym == __EPSILON);
    return sym;
}

/*
tsymbol yylex()
{
    return nextsym();
}
*/
