/*
 * build.c -- build sets of followers and firsts for all grammar rules
 *
 * 19 Feb 1993, Bert Bos <bert@let.rug.nl>
 *
 */

#ifndef __lint
static char rcsid[] = "$Header: build.c,v 0.2 93/02/23 19:27:40 bert Exp $";
#endif

#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "export.h"
#include "types.h"
#include "ll1.def"
#include "errors.e"

#ifndef NULL
#define NULL 0
#endif
 
EXPORT Symbol *symbol = NULL;                   /* Terminals & nonterminals */
EXPORT int nrsymbols = FIRST_SYM;               /* At least 257 symbols */
EXPORT int start_sym = 0;                       /* The start symbol */
EXPORT int nrterminals = FIRST_SYM;             /* Part of nrsymbols */
EXPORT Option parseoptions = NULL;              /* List of options */


/* init_symbols -- create the symbol array and initialize it */
EXPORT void init_symbols(void)
{
    int i;
    char s[10];

    symbol = calloc(FIRST_SYM, sizeof(symbol[0]));
    for (i = 0; i < FIRST_SYM; i++) {
        if (i == 0) sprintf(s, "__ENDMARK");
	else if (i < 32) sprintf(s, "'\\%03o'", i);
	else if (i < '\'') sprintf(s, "'%c'", i);
	else if (i == '\'') sprintf(s, "'\\'',");
	else if (i < '\\') sprintf(s, "'%c'", i);
	else if (i == '\\') sprintf(s, "'\\\\',");
	else if (i < 127) sprintf(s, "'%c'", i);
	else if (i < 160) sprintf(s, "'\\%03o'", i);
	else if (i < 255) sprintf(s, "'%c'", i);
	else sprintf(s, "'\\%03o'", i);
	symbol[i].name = strdup(s);
	symbol[i].terminal = True;
	symbol[i].pseudo = False;
    }
}

/* add_option -- add a parser option */
EXPORT void add_option(OptionType tp, char *s)
{
    Option h;

    h = malloc(sizeof(*h));
    h->tp = tp;
    h->option = s;
    h->next = parseoptions;
    parseoptions = h;
}

/* find_symbol -- find a symbol by name, 0 if not found */
static int find_symbol(char *name)
{
    int i = FIRST_SYM;

    while (i < nrsymbols && strcmp(symbol[i].name, name) != 0) i++;
    return i < nrsymbols ? i : 0;
}

/* decl_terminal -- create a terminal symbol (called by parser) */
EXPORT void decl_terminal(char *s)
{
    char msg[120];
    
    if (find_symbol(s)) {
        sprintf(msg, "Terminal listed twice: %s", s);
        error(msg);
    } else {
	symbol = realloc(symbol, (nrsymbols + 1) * sizeof(*symbol));
        symbol[nrsymbols].name = strdup(s);
        symbol[nrsymbols].terminal = True;
        symbol[nrsymbols].pseudo = False;
        nrsymbols++;
	nrterminals++;
    }
}

/* decl_rule -- create a nonterminal with a rule (called by parser) */
EXPORT void decl_rule(char *head, char *parm, char *locals, RuleSet r,
		      int line, Boolean pseudo)
{
    int i;
    char msg[170];

    i = find_symbol(head);
    if (i != 0 && symbol[i].terminal) {
        sprintf(msg, "used both as terminal and nonterminal: %s", head);
        error(msg);
    } else if (i != 0 && symbol[i].rules != NULL) {
        sprintf(msg, "duplicate rules for %s", head);
        error(msg);
    } else if (i == 0) {			/* Not used before */
	symbol = realloc(symbol, (nrsymbols + 1) * sizeof(*symbol));
#if 0
   	symbol[nrsymbols].name = strdup(head);
#else
   	symbol[nrsymbols].name = head;
#endif
	symbol[nrsymbols].terminal = False;
	symbol[nrsymbols].params = parm;
	symbol[nrsymbols].local_vars = locals;
	symbol[nrsymbols].rules = r;
	symbol[nrsymbols].lineno = line;
	symbol[nrsymbols].pseudo = pseudo;
	nrsymbols++;
    } else {					/* Occurred in some rhs */
        symbol[i].params = parm;
        symbol[i].local_vars = locals;
	symbol[i].rules = r;
	symbol[i].lineno = line;
	symbol[i].pseudo = pseudo;
    }
}

/* find_decl_symbol -- find or create a (non)terminal without a rule */
EXPORT int find_decl_symbol(char *s)
{
    int i;
    
    i = find_symbol(s);
    if (i == 0) {
        i = nrsymbols;
	symbol = realloc(symbol, (i + 1) * sizeof(*symbol));
        symbol[i].name = strdup(s);
        symbol[i].terminal = False;
        symbol[i].params = NULL;
        symbol[i].local_vars = NULL;
	symbol[i].rules = NULL;
	symbol[i].lineno = -1;
        nrsymbols++;
    }
    return i;
}

/* gensym -- generate a unique symbol */
EXPORT char *gensym(void)
{
    static seqnr = 0;
    char s[20];

    sprintf(s, "_GENSYM%d", seqnr);
    seqnr++;
    return strdup(s);
}

/* rule_nullable -- check if a rule can produce the empty production */
EXPORT Boolean rule_nullable(Rule r)
{
        /* assert(r == NULL || r->sym != __ENDMARK); */
    if (r == NULL) return True;
    else if (r->sym == __EPSILON) return rule_nullable(r->next);
    else if (r->rep == Star || r->rep == Opt) return rule_nullable(r->next);
    else if (symbol[r->sym].terminal) return False;
    else if (! symbol[r->sym].nullable) return False;
    else return rule_nullable(r->next);
}

/* r_n_recursive -- check if any of a set of rules is nullable */
static Boolean r_n_recursive(RuleSet r)
{
    if (r == NULL) return False;
    else return rule_nullable(r->rule) || r_n_recursive(r->alternative);
}

/* ruleset_nullable -- check if any of a set of rules is nullable */
static Boolean ruleset_nullable(RuleSet r)
{
    if (r == NULL) return True;
    else return r_n_recursive(r);
}

/* compute_nullable -- compute `nullable' flag for all symbols */
static void compute_nullable(void)
{
    Boolean changes, h;
    int i;

    for (i = 0; i < nrsymbols; i++)		/* Initialize to False */
        symbol[i].nullable = False;
    do {                                        /* Repeat until no change */
        changes = False;
        for (i = nrterminals; i < nrsymbols; i++) {
            h = ruleset_nullable(symbol[i].rules);
            if (h != symbol[i].nullable) {
                symbol[i].nullable = h;
                changes = True;                 /* Changed, so loop again */
            }
        }
    } while (changes);
}

static Set ruleset_firsts(RuleSet r); /* forward */

/* rule_firsts -- compute set of firsts of a rule */
EXPORT Set rule_firsts(Rule r)
{
    Set h;
    
    if (r == NULL)
	return SetNew(nrsymbols);
    else if (r->sym == __EPSILON)
	return rule_firsts(r->next);
    else if (r->rep == Star || r->rep == Opt || symbol[r->sym].nullable) {
	h = rule_firsts(r->next);
	SetAdd(h, symbol[r->sym].first);
	return h;
    } else
	return SetCopy(symbol[r->sym].first);
}

/* ruleset_firsts -- compute set of firsts for all alternative rules */
static Set ruleset_firsts(RuleSet r)
{
    Set h1, h2;
    
    if (r == NULL)
        return SetNew(nrsymbols);
    else {
        h1 = rule_firsts(r->rule);
        h2 = ruleset_firsts(r->alternative);
        SetAdd(h1, h2);
        free(h2);
        return h1;
    }
}

/* compute_firsts -- compute set of firsts for each symbol */
static void compute_firsts(void)
{
    Boolean changes;
    int i;
    Set h;

    for (i = 0; i < nrsymbols; i++) {		/* Initialize firsts sets */
    	symbol[i].first = SetNew(nrsymbols);
    	if (symbol[i].terminal) SetAdd1(symbol[i].first, i);
    }
    do {					/* Repeat until no changes */
    	changes = False;
	for (i = nrterminals; i < nrsymbols; i++) {
            h = ruleset_firsts(symbol[i].rules);
    	    if (! SetSubset(h, symbol[i].first)) {
    	        SetAdd(symbol[i].first, h);
    	        changes = True;             	/* Changes, so loop again */
    	    }
    	    free(h);
	}
    } while (changes);
}

/* iterate_rule -- set followers for all tokens in r, False if no change */
static Boolean iterate_rule(int head, Rule r)
{
    Set h;
    Boolean change;
    
    if (r == NULL) return False;
    else if (r->sym == __EPSILON) return iterate_rule(head, r->next);
    else {
        h = rule_firsts(r->next);
        if (rule_nullable(r->next)) SetAdd(h, symbol[head].follow);
        change = ! SetSubset(h, symbol[r->sym].follow);
        if (change) SetAdd(symbol[r->sym].follow, h);
        return iterate_rule(head, r->next) || change;
    }
}
    
/* iterate_ruleset -- set followers for all tokens in r, False if no change */
static Boolean iterate_ruleset(int head, RuleSet r)
{
    Boolean change;

    if (r == NULL) return False;
    change = iterate_rule(head, r->rule);
    if (iterate_ruleset(head, r->alternative)) change = True;
    return change;
}
    
/* compute_followers -- compute sets of followers for all symbols */
static void compute_followers(void)
{
    Boolean changes;
    int i;

    for (i = 0; i < nrsymbols; i++)		/* Initialize to empty */
        symbol[i].follow = SetNew(nrsymbols);
    SetAdd1(symbol[start_sym].follow, __ENDMARK);
    do {
        changes = False;
        for (i = nrterminals; i < nrsymbols; i++)
            if (iterate_ruleset(i, symbol[i].rules)) changes = True;
    } while (changes);
}   

/*
 * resync sets are sets associated with tokens in the rhs of a rule. Each
 * resync set consists of the followers of the token itself, united with the
 * followers of all tokens that come behind it in the rule. They are computed
 * to allow better panic mode recovery: every nonterminal is called with the
 * union of its own set and the resync set of the enclosing (calling)
 * nonterminal as keys, and  it starts by deleting tokens until one from this
 * set is found.
 */
 
/* resync_rule -- set resync sets for all tokens in a rule */
static void resync_rule(Rule r, Set *s)
{
    Set h;
    
    if (r == NULL)
        *s = SetNew(nrsymbols);
    else {
        resync_rule(r->next, &h);
        if (r->sym == __EPSILON)
            *s = h;
        else {
            *s = SetUnion(h, symbol[r->sym].follow);
            r->resync = *s;
        }
    }
}

/* resync_ruleset -- set resync sets for all rules in a ruleset */
static void resync_ruleset(RuleSet r)
{
    Set dummy;
    
    if (r != NULL) {
        resync_rule(r->rule, &dummy);
        resync_ruleset(r->alternative);
    }
}

/* compute_resync -- compute resync sets for all tokens in all rules */
static void compute_resync(void)
{
    int i;
    
    for (i = 0; i < nrsymbols; i++)
        if (! symbol[i].terminal) resync_ruleset(symbol[i].rules);
}

/* check_completeness -- check if all nonterminals have rules */
static void check_completeness(void)
{
    int i;
    char msg[120];

    for (i = nrterminals; i < nrsymbols; i++) 
        if (symbol[i].rules == NULL) {
	    sprintf(msg, "neither defined as terminal nor as nonterminal: %s",
	        symbol[i].name);
	    error(msg);
	}
}

/* dump -- print the member symbols of a set */
static void dump_set(FILE *f, Set s)
{
    int i;

    for (i = 0; i < nrsymbols; i++)
	if (SetMember(i, s)) fprintf(f, " %s", symbol[i].name);
}

/* chk_ruleset_ambiguity -- check if there are any non-distinct first sets */
static void chk_ruleset_ambiguity(int sym, int alt, Set firsts, RuleSet r)
{
    Set h, h2;

    if (r == NULL) return;
    h = rule_firsts(r->rule);
    h2 = SetIntersection(h, firsts);
    if (! SetEmpty(h2)) {
	fprintf(stderr, "%s:%d: ambiguity in %s, alternative %d\n",
		inputname, symbol[sym].lineno, symbol[sym].name, alt);
	fprintf(stderr, "  already have:    "); dump_set(stderr, firsts);
	fprintf(stderr, "\n  this alternative:"); dump_set(stderr, h);
	fprintf(stderr, "\n  intersection:    ");
	dump_set(stderr, h2);
	fprintf(stderr, "\n");
	nrerrors++;
    }
    free(h2);
    SetAdd(firsts, h);
    free(h);
    chk_ruleset_ambiguity(sym, alt + 1, firsts, r->alternative);
}

/* check_ambiguity -- check if there are any non-distinct first sets */
static void check_ambiguity(void)
{
    int i;
    Set h;

    for (i = nrterminals; i < nrsymbols; i++) {
	h = SetNew(nrsymbols);
	chk_ruleset_ambiguity(i, 1, h, symbol[i].rules);
	free(h);
    }
}

/* compute_sets -- compute firsts and followers sets, return # of errors */
EXPORT void compute_sets(void)
{
    check_completeness();
    compute_nullable();
    compute_firsts();
    compute_followers();
    compute_resync();
    check_ambiguity();
}

