/*
 * generate.c -- routines to generate a top down parser
 *
 * 20 Feb 1993, Bert Bos <bert@let.rug.nl>
 * 18 Feb 1997, Bert Bos <bert@w3.org>
 *              Fixed bug in _tset_union
 *
 * This module exports the routines `generate(FILE *f)', that writes
 * types and procedures to file f, and `gen_header(FILE *f)', that
 * writes an enum for all terminals to file f.
 *
 */

#ifndef __lint
static char rcsid[] = "$Id: generate.c,v 1.2 1997/02/21 12:15:44 bert Exp $";
#endif

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "types.h"
#include "build.e"
/* #include "parse.def" */
#include "ll1.def"
#include "errors.e"
#include "export.h"

#define TAB 4
#define PREFIX "p__"


/* name_of_sym -- return the name of symbol sym, as an ident */
static char *name_of_sym(int sym)
{
    static char s[MAXSTR];

    if (sym >= FIRST_SYM) sprintf(s, "_%s", symbol[sym].name);
    else sprintf(s, "char%d", sym);
    return s;
}

/* indent -- indent to level n */
static void indent(FILE *f, int n)
{
    int i;
    for (i = 0; i < n * TAB; i++) putc(' ', f);
}

/* gen_parms -- extract the variables from declarations */
static void gen_parms(FILE *f, char *decls, Boolean start_comma)
{
    char *word = NULL, *h, *h1;

    h1 = h = strdup(decls);
    while (*h) {
	if (isalpha(*h) || *h == '_') {
	    word = h;
	    do { h++; } while (isalnum(*h) || *h == '_');
	}
	if (*h == '\0') {
	    fprintf(f, "%s%s", start_comma ? ", " : "", word);
	    start_comma = True;
	} else if (*h == ',') {
	    *h = '\0';
	    fprintf(f, "%s%s", start_comma ? ", " : "", word);
	    start_comma = True;
	    h++;
	} else {
	    *h = '\0';
	    h++;
	}
    }
    free(h1);
}

/* gen_condition -- generate a condition on sym */
static void gen_condition(FILE *f, Set s, char *guard)
{
    int i;

    fprintf(f, "(0");
    for (i = 0; i < nrsymbols; i++)
        if (SetMember(i, s)) fprintf(f, "||*sym==%s", symbol[i].name);
    if (guard != NULL) fprintf(f, "||(%s)", guard);
    fprintf(f, ")");
}

/* gen_call -- generate a call to a nonterminal */
static void gen_call(FILE *f, Rule r, char *state, int lev, Boolean lines)
{
    if (lines) fprintf(f, "#line %d \"%s\"\n", r->lineno, inputname);
    indent(f, lev); fprintf(f, "%s%s(", PREFIX, symbol[r->sym].name);
    fprintf(f, "sym, __nrerrors__, _tset_union(_keys1, resync__%d, _keys0)",
	    r->resync_nr);
    if (state) gen_parms(f, state, True);
    if (r->sem != NULL) fprintf(f, ", %s", r->sem);
    fprintf(f, ");\n");
}

/* gen_terminal -- generate code to parse a terminal */
static void gen_terminal(FILE *f, Rule r, Boolean skip_cond, int lev,
			 char *insertion, char *nextsym, char *state,
			 Boolean lines)
    /* skip_cond = True, then no `if (..)' required */
    /* lev = indentation level */
    /* lines = whether to print #line */
{
    if (lines) fprintf(f, "#line %d \"%s\"\n", r->lineno, inputname);
    if (skip_cond) {
	if (r->sem) { indent(f, lev); fprintf(f, "%s\n", r->sem); }
	indent(f, lev);
	fprintf(f, "*sym = (tsymbol)%s(", nextsym);
	if (state) gen_parms(f, state, False);
	fprintf(f, ");\n");
    } else {
        indent(f, lev);
	fprintf(f, "_delete(sym, __nrerrors__, _tset_union(_keys1, \
_keys0, firsts_%s)", name_of_sym(r->sym));
	if (state) gen_parms(f, state, True);
	fprintf(f, ");\n");
	indent(f, lev); fprintf(f, "if (*sym==%s) {\n", symbol[r->sym].name);
        if (r->sem) { indent(f, lev + 1); fprintf(f, "%s\n", r->sem); }
	indent(f, lev + 1);
	fprintf(f, "*sym = (tsymbol)%s(", nextsym);
	if (state) gen_parms(f, state, False);
	fprintf(f, ");\n");
	indent(f, lev); fprintf(f, "} else {\n");
	indent(f, lev + 1); fprintf(f, "(*__nrerrors__)++;\n");
	indent(f, lev + 1);
	fprintf(f, "%s((tsymbol)%s);\n", insertion, symbol[r->sym].name);
	indent(f, lev); fprintf(f, "}\n");
    }
}

static void generate_ruleset(FILE *f, int head, char *ins, char *nxt,
			     char *state, int lvl, Boolean lines);

/* generate_rule -- generate the sequence of statements for a rule */
static void generate_rule(FILE *f, Rule r, Boolean skip_cond, int lev,
			  char *insertion, char *nextsym, char *state,
			  Boolean lines)
    /* if skip_cond, then no `if (..)' at first */
    /* lev = indentation level */
{
    if (r == NULL) {
	return;
    } else if (r->sym == __EPSILON) {
        if (lines) fprintf(f, "#line %d \"%s\"\n", r->lineno, inputname);
	indent(f, lev);	fprintf(f, "%s\n", r->sem);
    } else if (r->rep == Star) {
	indent(f, lev);	fprintf(f, "while ");
	gen_condition(f, symbol[r->sym].first, NULL);
	fprintf(f, " {\n");
	if (symbol[r->sym].pseudo)
	    generate_ruleset(f, r->sym, insertion, nextsym, state, lev + 1, lines);
	else if (symbol[r->sym].terminal)
	    gen_terminal(f, r, True, lev + 1, insertion, nextsym, state, lines);
	else
	    gen_call(f, r, state, lev + 1, lines);
	indent(f, lev); fprintf(f, "}\n");
    } else if (r->rep == Plus) {
	indent(f, lev);	fprintf(f, "do {\n");
	if (symbol[r->sym].pseudo)
	    generate_ruleset(f, r->sym, insertion, nextsym, state, lev + 1, lines);
	else if (symbol[r->sym].terminal)
	    gen_terminal(f, r, skip_cond, lev + 1, insertion, nextsym, state, lines);
	else
	    gen_call(f, r, state, lev + 1, lines);
	indent(f, lev); fprintf(f, "} while ");
	gen_condition(f, symbol[r->sym].first, NULL);
	fprintf(f, ";\n");
    } else if (r->rep == Opt) {
	indent(f, lev);	fprintf(f, "if ");
	gen_condition(f, symbol[r->sym].first, NULL);
	fprintf(f, " {\n");
	if (symbol[r->sym].pseudo)
	    generate_ruleset(f, r->sym, insertion, nextsym, state, lev + 1, lines);
	else if (symbol[r->sym].terminal)
	    gen_terminal(f, r, True, lev + 1, insertion, nextsym, state, lines);
	else
	    gen_call(f, r, state, lev + 1, lines);
	indent(f, lev); fprintf(f, "}\n");
    } else {
	if (symbol[r->sym].pseudo)
	    generate_ruleset(f, r->sym, insertion, nextsym, state, lev, lines);
	else if (symbol[r->sym].terminal)
	    gen_terminal(f, r, skip_cond, lev, insertion, nextsym, state, lines);
	else
	    gen_call(f, r, state, lev, lines);
    }
    generate_rule(f, r->next, False, lev, insertion, nextsym, state, lines);
}

/* generate_ruleset -- generate code for a set of alternative rules */
static void generate_ruleset(FILE *f, int head, char *ins, char *nxt,
			     char *state, int lvl, Boolean lines)
{
    RuleSet h;
    Set firsts;
    char msg[256];

    if (lines) fprintf(f, "#line %d \"%s\"\n", symbol[head].lineno, inputname);
    if (! symbol[head].rules->alternative) {	/* Single rule */
	if (symbol[head].rules->guard) {
	    sprintf(msg, "%s:%d: guard ignored",inputname,symbol[head].lineno);
	    error(msg);
	}
        generate_rule(f, symbol[head].rules->rule, False, 1, ins, nxt, state, lines);
    } else {
        indent(f, lvl);
	fprintf(f, "_delete(sym, __nrerrors__, _tset_union(_keys1, \
_keys0, firsts_%s)", name_of_sym(head));
	if (state) gen_parms(f, state, True);
	fprintf(f, ");\n");
	indent(f, lvl);				/* Only for first branch */
	/* Generate all branches except last */
	for (h = symbol[head].rules; h->alternative; h = h->alternative) {
	    firsts = rule_firsts(h->rule);
	    fprintf(f, "if ");
	    gen_condition(f, firsts, h->guard);
	    fprintf(f, " {\n");
	    free(firsts);
	    generate_rule(f, h->rule, True, lvl + 1, ins, nxt, state, lines);
	    indent(f, lvl); fprintf(f, "} else ");
	}
	/* Generate last branch */
	if (rule_nullable(h->rule)) {		/* Last is nullable */
	    fprintf(f, "{\n");
	    generate_rule(f, h->rule, False, lvl + 1, ins, nxt, state, lines);
	    indent(f, lvl); fprintf(f, "}\n");
	} else {				/* Last is not nullable */
	    firsts = rule_firsts(h->rule);
	    fprintf(f, "if ");
	    gen_condition(f, firsts, h->guard); 
	    fprintf(f, " {\n");
	    free(firsts);
	    generate_rule(f, h->rule, True, lvl + 1, ins, nxt, state, lines);
	    indent(f, lvl); fprintf(f, "} else {\n");
	    indent(f, lvl+1); fprintf(f, "(*__nrerrors__)++;\n");
	    indent(f, lvl+1); fprintf(f, "%s(%s);\n", ins, symbol[head].name);
	    indent(f, lvl); fprintf(f, "}\n");
	}
    }
}

/* gen_rule_resync -- generate resync sets for all tokens in a rule */
static void gen_rule_resync(FILE *f, Rule r, int *n)
{
    char s[30];
    
    if (r == NULL) return;
    if (r->sym != __EPSILON) {
        fprintf(f, "static ");
        sprintf(s, "resync__%d", *n);
        SetPrint(f, r->resync, s);
        r->resync_nr = *n;
        (*n)++;
    }
    gen_rule_resync(f, r->next, n);
}

/* gen_ruleset_resync -- generate resync sets for all rules in a ruleset */
static void gen_ruleset_resync(FILE *f, RuleSet r, int *n)
{
    if (r != NULL) {
        gen_rule_resync(f, r->rule, n);
        gen_ruleset_resync(f, r->alternative, n);
    }
}

/* gen_options -- generate include directives (in `reverse' order) */
static void gen_options(FILE *f, Option h, char **ins, char **del,
			char **nxt, char **parser, char **state,
			Boolean *lines)
{
    if (h != NULL) {
	gen_options(f, h->next, ins, del, nxt, parser, state, lines);
	switch (h->tp) {
	case OInclSys: fprintf(f, "#include <%s>\n", h->option); break;
	case OInclLoc: fprintf(f, "#include \"%s\"\n", h->option); break;
	case OInsertion:
	    *ins = h->option;
	    fprintf(f, "extern void %s(tsymbol);\n", *ins);
	    break;
	case ODeletion:
	    *del = h->option;
	    fprintf(f, "extern void %s(tsymbol);\n", *del);
	    break;
	case OScanner:
	    *nxt = h->option;
	    fprintf(f, "extern int %s(%s);\n", *nxt, *state ? *state : "void");
	    break;
	case OParser:
	    *parser = h->option;
	    break;
	case OState:
	    *state = h->option;
	    break;
	case ONoLineno:
	    *lines = False;
	    break;
	default: assert(! "Cannot happen");
	}
    }
}

#define TSET_UNION "\
static unsigned int * _tset_union(tset r, tset a, tset b)\n\
{int i;for(i=0;i<=a[0];i++)r[i]=a[i]|b[i];return r;}\n"

#define TSET_MEMBER "\
static int _tset_member(int x, tset s)\n\
{return(s[1+x/(8*sizeof(int))]&(1<<(x%(8*sizeof(int)))))!=0;}\n"

#define DELETE1 /* 4 args: ", ", state, deletion, nextsym */ "\n\
static void _delete(tsymbol *sym, int *__nrerrors__, tset keys%s%s)\n\
{while(!_tset_member(*sym,keys)){(*__nrerrors__)++;%s((tsymbol)*sym);*sym=(tsymbol)%s("

#define DELETE2 ");}}\n\n"

#define INSERTION_FUNC "\
static void insertion(tsymbol sym)\n\
{(void)fprintf(stderr, \"%s expected\\n\", symbol_name[(int)sym]);}\n"

#define DELETION_FUNC "\
static void deletion(tsymbol sym)\n\
{(void)fprintf(stderr, \"%s unexpected\\n\", symbol_name[(int)sym]);}\n"

/* generate -- generate all parser functions */
EXPORT void generate(FILE *f, char *basename)
{
    int i, n, j;
    char s[1024];
    Set endset;
    char *ins = NULL, *del = NULL, *nxt = NULL, *parser = "ll1parse";
    char *state = NULL;
    Boolean lines = True;			/* Print #line directives */

    /* Process options */
    fprintf(f, "#include <stdio.h>\n");
    fprintf(f, "#include \"%s.def\"\n", basename);
    gen_options(f, parseoptions, &ins, &del, &nxt, &parser, &state, &lines);
    if (! nxt) {
	nxt = "yylex";
	fprintf(f, "extern int %s(%s);\n", nxt, state ? state : "void");
    }
    if (! ins) { ins = "insertion"; fprintf(f, "\n%s", INSERTION_FUNC); }
    if (! del) { del = "deletion"; fprintf(f, "\n%s", DELETION_FUNC); }

    /* Generate symbol_name array */
    fprintf(f, "\nchar *symbol_name[] = {\n");
#if 0
    fprintf(f, "\"!EPSILON!\", \"end-of-input\",\n");
    for (i = FIRST_SYM, j = 0; i < nrsymbols; i++) {
	j += strlen(symbol[i].name) + 4;
	if (j > 79) { putc('\n', f); j = strlen(symbol[i].name) + 4; }
	fprintf(f, "\"%s\", ", symbol[i].name);
    }
#else
    fprintf(f, " \"__ENDMARK\",\n");
    for (i = 1; i < 256; i++) {
	if (i < 32) fprintf(f, " \"\\\\%03o\",", i);
	else if (i < '"') fprintf(f, " \"%c\",", i);
	else if (i == '"') fprintf(f, " \"\\\"\",");
	else if (i < '\\') fprintf(f, " \"%c\",", i);
	else if (i == '\\') fprintf(f, " \"\\\\\",");
	else if (i < 127) fprintf(f, " \"%c\",", i);
	else if (i < 160) fprintf(f, " \"\\\\%03o\",", i);
	else if (i < 255) fprintf(f, " \"%c\",", i);
	else fprintf(f, " \"\\%03o\",", i);
	if (i % 8 == 7) fprintf(f, "\n");
    }
    fprintf(f, " \"__EPSILON\",\n");
    for (i = FIRST_SYM, j = 0; i < nrsymbols; i++) {
	j += strlen(symbol[i].name) + 4;
	if (j > 79) { putc('\n', f); j = strlen(symbol[i].name) + 4; }
	fprintf(f, " \"%s\",", symbol[i].name);
    }
#endif
    fprintf(f, "};\n\n");
    
    /* Generate firsts sets */
    for (i = 0 /*was: FIRST_SYM*/; i < nrsymbols; i++) {
        sprintf(s, "firsts_%s", name_of_sym(i));
        fprintf(f, "static ");
        SetPrint(f, symbol[i].first, s);
    }

    /* Generate resync sets */
    putc('\n', f);
    n = 0;
    for (i = FIRST_SYM; i < nrsymbols; i++)
        if (! symbol[i].terminal) gen_ruleset_resync(f, symbol[i].rules, &n);

    fprintf(f, "\n%s", TSET_MEMBER);		/* `_tset_member' function */
    fprintf(f, "\n%s", TSET_UNION);		/* `_tset_union' function */

    /* `_delete' function */
    fprintf(f, DELETE1, state ? ", " : "", state ? state : "", del, nxt);
    if (state) gen_parms(f, state, False);
    fprintf(f, DELETE2);

    /* Generate forward declarations for all functions */
    for (i = nrterminals; i < nrsymbols; i++) {
	fprintf(f, "static void %s%s(", PREFIX, symbol[i].name);
	fprintf(f, "tsymbol*, int*, tset");
	if (state) fprintf(f, ", %s", state);
	if (symbol[i].params) fprintf(f, ", %s", symbol[i].params);
	fprintf(f, ");\n");
    }

    /* Generate the functions themselves */
    for (i = nrterminals; i < nrsymbols; i++) {
	if (symbol[i].pseudo) continue;
	if (lines) fprintf(f, "\n#line %d \"%s\"\n", symbol[i].lineno, inputname);
	fprintf(f, "static void %s%s(", PREFIX, symbol[i].name);
	fprintf(f, "tsymbol *sym, int *__nrerrors__, tset _keys0");
	if (state != NULL) fprintf(f, ", %s", state);
	if (symbol[i].params) fprintf(f, ", %s", symbol[i].params);
	fprintf(f, ")\n");
	fprintf(f, "{\n");
	indent(f, 1); fprintf(f, "tset _keys1;\n");
	if (symbol[i].local_vars) {
	    indent(f, 1);
	    fprintf(f, "%s\n", symbol[i].local_vars);
	}
	generate_ruleset(f, i, ins, nxt, state, 1, lines);
	fprintf(f, "}\n");
    }

    /* Generate `ll1parse' function */
    endset = SetSingleton(nrsymbols, __ENDMARK);
    fprintf(f, "\nint %s(", parser);
    if (state && symbol[start_sym].params)
	fprintf(f, "%s, %s", state, symbol[start_sym].params);
    else if (state)
	fprintf(f, "%s", state);
    else if (symbol[start_sym].params)
	fprintf(f, "%s", symbol[start_sym].params);
    else
	fprintf(f, "void");
    fprintf(f, ")\n");
    fprintf(f, "{\n");
    indent(f, 1); fprintf(f, "tsymbol sym;\n");
    indent(f, 1); fprintf(f, "int __nrerrors__ = 0;\n");
    indent(f, 1); SetPrint(f, endset, "_end_set");
    indent(f, 1); fprintf(f, "sym = (tsymbol)%s(", nxt);
    if (state) gen_parms(f, state, False);
    fprintf(f, ");\n");
    indent(f, 1); fprintf(f, "%s%s(&sym, &__nrerrors__, _end_set", PREFIX,
			  symbol[start_sym].name);
    if (state) gen_parms(f, state, True);
    if (symbol[start_sym].params) gen_parms(f, symbol[start_sym].params, True);
    fprintf(f, ");\n");
    indent(f, 1); fprintf(f, "while (sym != __ENDMARK) {\n");
    indent(f, 2); fprintf(f, "__nrerrors__++;\n");
    indent(f, 2); fprintf(f, "%s((tsymbol)sym);\n", del);
    indent(f, 2); fprintf(f, "sym = (tsymbol)%s(", nxt);
    if (state) gen_parms(f, state, False);
    fprintf(f, ");\n");
    indent(f, 1); fprintf(f, "}\n");
    indent(f, 1); fprintf(f, "return __nrerrors__;\n");
    fprintf(f, "}\n");
    free(endset);
}

/* gen_header -- generate a header file for the parser */
EXPORT void gen_header(FILE *f)
{
    int i, j;

    /* Generate typedef for sets of symbols: tset */
    fprintf(f, "typedef unsigned int \
tset[1+(%d+8*sizeof(int)-1)/(8*sizeof(int))];\n\n", nrsymbols);

    /* Generate typedef for tsymbol */
    fprintf(f, "typedef enum { __ENDMARK=0, __EPSILON=%d,\n", FIRST_SYM - 1);
    for (i = FIRST_SYM, j = 0; i < nrsymbols; i++, j++) {
        if (j == 5) { putc('\n', f); j = 0; }
        fprintf(f, "%s=%d, ", symbol[i].name, i);
    }
    fprintf(f, " \n} tsymbol;\n\n");

    fprintf(f, "extern char *symbol_name[];\n");
}

