/* * misc.c * * Manage tokens, regular expressions. * Print methods for debugging * Compute follow lists onto tail ends of rules. * * The following functions are visible: * * int addTname(char *); Add token name * int addTexpr(char *); Add token expression * int Tnum(char *); Get number of expr/token * void Tklink(char *, char *); Link a name with an expression * int hasAction(expr); Does expr already have action assigned? * void setHasAction(expr); Indicate that expr now has an action * Entry *newEntry(char *,int); Create new table entry with certain size * void list_add(ListNode **list, char *e) * void list_free(ListNode **list, int freeData); *** MR10 *** * void list_apply(ListNode *list, void (*f)()) * void lexclass(char *m); switch to new/old lexical class * void lexmode(int i); switch to old lexical class i * * SOFTWARE RIGHTS * * We reserve no LEGAL rights to the Purdue Compiler Construction Tool * Set (PCCTS) -- PCCTS is in the public domain. An individual or * company may do whatever they wish with source code distributed with * PCCTS or the code generated by PCCTS, including the incorporation of * PCCTS, or its output, into commerical software. * * We encourage users to develop software with PCCTS. However, we do ask * that credit is given to us for developing PCCTS. By "credit", * we mean that if you incorporate our source code into one of your * programs (commercial product, research project, or otherwise) that you * acknowledge this fact somewhere in the documentation, research report, * etc... If you like PCCTS and have developed a nice tool with the * output, please mention that you developed it using PCCTS. In * addition, we ask that this header remain intact in our source code. * As long as these guidelines are kept, we expect to continue enhancing * this system and expect to make other tools available as they are * completed. * * ANTLR 1.33 * Terence Parr * Parr Research Corporation * with Purdue University and AHPCRC, University of Minnesota * 1989-2001 */ #include #include "pcctscfg.h" #include "set.h" #include "syn.h" #include "hash.h" #include "generic.h" #include "dlgdef.h" #include static int tsize=TSChunk; /* size of token str arrays */ static void #ifdef __USE_PROTOS RemapForcedTokensInSyntaxDiagram(Node *); #else RemapForcedTokensInSyntaxDiagram(); #endif /* T o k e n M a n i p u l a t i o n */ /* * add token 't' to the TokenStr/Expr array. Make more room if necessary. * 't' is either an expression or a token name. * * There is only one TokenStr array, but multiple ExprStr's. Therefore, * for each lex class (element of lclass) we must extend the ExprStr array. * ExprStr's and TokenStr are always all the same size. * * Also, there is a Texpr hash table for each automaton. */ static void #ifdef __USE_PROTOS Ttrack( char *t ) #else Ttrack( t ) char *t; #endif { if ( TokenNum >= tsize ) /* terminal table overflow? */ { char **p; int i, more, j; more = TSChunk * (1 + ((TokenNum-tsize) / TSChunk)); tsize += more; TokenStr = (char **) realloc((char *)TokenStr, tsize*sizeof(char *)); require(TokenStr != NULL, "Ttrack: can't extend TokenStr"); for (i=0; iexpr = e; p->lclass = CurrentLexClass; return p; } /* switch to lexical class/mode m. This amounts to creating a new * lex mode if one does not already exist and making ExprStr point * to the correct char string array. We must also switch Texpr tables. * * BTW, we need multiple ExprStr arrays because more than one automaton * may have the same label for a token, but with different expressions. * We need to track an expr for each automaton. If we disallowed this * feature, only one ExprStr would be required. */ void #ifdef __USE_PROTOS lexclass( char *m ) #else lexclass( m ) char *m; #endif { int i; TermEntry *p; static char EOFSTR[] = "\"@\""; if ( hash_get(Tname, m) != NULL ) { warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m)); } /* does m already exist? */ i = LexClassIndex(m); if ( i != -1 ) {lexmode(i); return;} /* must make new one */ NumLexClasses++; CurrentLexClass = NumLexClasses-1; require(NumLexClasses<=MaxLexClasses, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files"); lclass[CurrentLexClass].classnum = m; lclass[CurrentLexClass].exprs = (char **) calloc(tsize, sizeof(char *)); require(lclass[CurrentLexClass].exprs!=NULL, "lexclass: cannot allocate ExprStr"); lclass[CurrentLexClass].htable = newHashTable(); ExprStr = lclass[CurrentLexClass].exprs; Texpr = lclass[CurrentLexClass].htable; /* define EOF for each automaton */ p = newTermEntry( EOFSTR ); p->token = EofToken; /* couldn't have remapped tokens yet, use EofToken */ hash_add(Texpr, EOFSTR, (Entry *)p); list_add(&ExprOrder, (void *)newExpr(EOFSTR)); /* note: we use the actual ExprStr array * here as TokenInd doesn't exist yet */ ExprStr[EofToken] = EOFSTR; } void #ifdef __USE_PROTOS lexmode( int i ) #else lexmode( i ) int i; #endif { require(iaction!=NULL); } void #ifdef __USE_PROTOS setHasAction( char *expr, char *action ) #else setHasAction( expr, action ) char *expr; char *action; #endif { TermEntry *p; require(expr!=NULL, "setHasAction: invalid expr"); p = (TermEntry *) hash_get(Texpr, expr); require(p!=NULL, eMsg1("setHasAction: expr '%s' doesn't exist",expr)); p->action = action; } ForcedToken * #ifdef __USE_PROTOS newForcedToken(char *token, int tnum) #else newForcedToken(token, tnum) char *token; int tnum; #endif { ForcedToken *ft = (ForcedToken *) calloc(1, sizeof(ForcedToken)); require(ft!=NULL, "out of memory"); ft->token = token; ft->tnum = tnum; return ft; } /* * Make a token indirection array that remaps token numbers and then walk * the appropriate symbol tables and SynDiag to change token numbers */ void #ifdef __USE_PROTOS RemapForcedTokens(void) #else RemapForcedTokens() #endif { ListNode *p; ForcedToken *q; int max_token_number=0; /* MR9 23-Sep-97 Removed "unsigned" */ int i; if ( ForcedTokens == NULL ) return; /* find max token num */ for (p = ForcedTokens->next; p!=NULL; p=p->next) { q = (ForcedToken *) p->elem; if ( q->tnum > max_token_number ) max_token_number = q->tnum; } fprintf(stderr, "max token number is %d\n", max_token_number); /* make token indirection array */ TokenInd = (int *) calloc(max_token_number+1, sizeof(int)); LastTokenCounted = TokenNum; TokenNum = max_token_number+1; require(TokenInd!=NULL, "RemapForcedTokens: cannot allocate TokenInd"); /* fill token indirection array and change token id htable ; swap token indices */ for (i=1; inext; p!=NULL; p=p->next) { TermEntry *te; int old_pos, t; q = (ForcedToken *) p->elem; fprintf(stderr, "%s forced to %d\n", q->token, q->tnum); te = (TermEntry *) hash_get(Tname, q->token); require(te!=NULL, "RemapForcedTokens: token not in hash table"); old_pos = te->token; fprintf(stderr, "Before: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]); fprintf(stderr, "Before: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]); q = (ForcedToken *) p->elem; t = TokenInd[old_pos]; TokenInd[old_pos] = q->tnum; TokenInd[q->tnum] = t; te->token = q->tnum; /* update token type id symbol table */ fprintf(stderr, "After: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]); fprintf(stderr, "After: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]); /* Change the token number in the sym tab entry for the exprs * at the old position of the token id and the target position */ /* update expr at target (if any) of forced token id */ if ( q->tnum < TokenNum ) /* is it a valid position? */ { for (i=0; itnum]!=NULL ) { /* update the symbol table for this expr */ TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[q->tnum]); require(e!=NULL, "RemapForcedTokens: expr not in hash table"); e->token = old_pos; fprintf(stderr, "found expr '%s' at target %d in lclass[%d]; changed to %d\n", lclass[i].exprs[q->tnum], q->tnum, i, old_pos); } } } /* update expr at old position (if any) of forced token id */ for (i=0; itoken = q->tnum; fprintf(stderr, "found expr '%s' for id %s in lclass[%d]; changed to %d\n", lclass[i].exprs[old_pos], q->token, i, q->tnum); } } } /* Update SynDiag */ RemapForcedTokensInSyntaxDiagram((Node *)SynDiag); } static void #ifdef __USE_PROTOS RemapForcedTokensInSyntaxDiagram(Node *p) #else RemapForcedTokensInSyntaxDiagram(p) Node *p; #endif { Junction *j = (Junction *) p; RuleRefNode *r = (RuleRefNode *) p; TokNode *t = (TokNode *)p; if ( p==NULL ) return; require(p->ntype>=1 && p->ntype<=NumNodeTypes, "Remap...: invalid diagram node"); switch ( p->ntype ) { case nJunction : if ( j->visited ) return; if ( j->jtype == EndRule ) return; j->visited = TRUE; RemapForcedTokensInSyntaxDiagram( j->p1 ); RemapForcedTokensInSyntaxDiagram( j->p2 ); j->visited = FALSE; return; case nRuleRef : RemapForcedTokensInSyntaxDiagram( r->next ); return; case nToken : if ( t->remapped ) return; /* we've been here before */ t->remapped = 1; fprintf(stderr, "remapping %d to %d\n", t->token, TokenInd[t->token]); t->token = TokenInd[t->token]; RemapForcedTokensInSyntaxDiagram( t->next ); return; case nAction : RemapForcedTokensInSyntaxDiagram( ((ActionNode *)p)->next ); return; default : fatal_internal("invalid node type"); } } /* * Add a token name. Return the token number associated with it. If it already * exists, then return the token number assigned to it. * * Track the order in which tokens are found so that the DLG output maintains * that order. It also lets us map token numbers to strings. */ int #ifdef __USE_PROTOS addTname( char *token ) #else addTname( token ) char *token; #endif { TermEntry *p; require(token!=NULL, "addTname: invalid token name"); if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token; p = newTermEntry( token ); Ttrack( p->str ); p->token = TokenNum++; hash_add(Tname, token, (Entry *)p); return p->token; } /* This is the same as addTname except we force the TokenNum to be tnum. * We don't have to use the Forced token stuff as no tokens will have * been defined with #tokens when this is called. This is only called * when a #tokdefs meta-op is used. */ int #ifdef __USE_PROTOS addForcedTname( char *token, int tnum ) #else addForcedTname( token, tnum ) char *token; int tnum; #endif { TermEntry *p; require(token!=NULL, "addTname: invalid token name"); if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token; p = newTermEntry( token ); Ttrack( p->str ); p->token = tnum; hash_add(Tname, token, (Entry *)p); return p->token; } /* * Add a token expr. Return the token number associated with it. If it already * exists, then return the token number assigned to it. */ int #ifdef __USE_PROTOS addTexpr( char *expr ) #else addTexpr( expr ) char *expr; #endif { TermEntry *p; require(expr!=NULL, "addTexpr: invalid regular expression"); if ( (p=(TermEntry *)hash_get(Texpr, expr)) != NULL ) return p->token; p = newTermEntry( expr ); Ttrack( p->str ); /* track the order in which they occur */ list_add(&ExprOrder, (void *)newExpr(p->str)); p->token = TokenNum++; hash_add(Texpr, expr, (Entry *)p); return p->token; } /* return the token number of 'term'. Return 0 if no 'term' exists */ int #ifdef __USE_PROTOS Tnum( char *term ) #else Tnum( term ) char *term; #endif { TermEntry *p; require(term!=NULL, "Tnum: invalid terminal"); if ( *term=='"' ) p = (TermEntry *) hash_get(Texpr, term); else p = (TermEntry *) hash_get(Tname, term); if ( p == NULL ) return 0; else return p->token; } /* associate a Name with an expr. If both have been already assigned * token numbers, then an error is reported. Add the token or expr * that has not been added if no error. This 'represents' the #token * ANTLR pseudo-op. If both have not been defined, define them both * linked to same token number. */ void #ifdef __USE_PROTOS Tklink( char *token, char *expr ) #else Tklink( token, expr ) char *token; char *expr; #endif { TermEntry *p, *q; require(token!=NULL && expr!=NULL, "Tklink: invalid token name and/or expr"); p = (TermEntry *) hash_get(Tname, token); q = (TermEntry *) hash_get(Texpr, expr); if ( p != NULL && q != NULL ) /* both defined */ { warn( eMsg2("token name %s and rexpr %s already defined; ignored", token, expr) ); return; } if ( p==NULL && q==NULL ) /* both not defined */ { int t = addTname( token ); q = newTermEntry( expr ); hash_add(Texpr, expr, (Entry *)q); q->token = t; /* note: we use the actual ExprStr array * here as TokenInd doesn't exist yet */ ExprStr[t] = q->str; /* track the order in which they occur */ list_add(&ExprOrder, (void *)newExpr(q->str)); return; } if ( p != NULL ) /* one is defined, one is not */ { q = newTermEntry( expr ); hash_add(Texpr, expr, (Entry *)q); q->token = p->token; ExprStr[p->token] = q->str; /* both expr and token str defined now */ list_add(&ExprOrder, (void *)newExpr(q->str)); } else /* trying to associate name with expr here*/ { p = newTermEntry( token ); hash_add(Tname, token, (Entry *)p); p->token = q->token; TokenStr[p->token] = p->str;/* both expr and token str defined now */ } } /* * Given a string, this function allocates and returns a pointer to a * hash table record of size 'sz' whose "str" pointer is reset to a position * in the string table. */ Entry * #ifdef __USE_PROTOS newEntry( char *text, int sz ) #else newEntry( text, sz ) char *text; int sz; #endif { Entry *p; require(text!=NULL, "new: NULL terminal"); if ( (p = (Entry *) calloc(1,sz)) == 0 ) { fatal_internal("newEntry: out of memory for terminals\n"); exit(PCCTS_EXIT_FAILURE); } p->str = mystrdup(text); return(p); } /* * add an element to a list. * * Any non-empty list has a sentinel node whose 'elem' pointer is really * a pointer to the last element. (i.e. length(list) = #elemIn(list)+1). * Elements are appended to the list. */ void #ifdef __USE_PROTOS list_add( ListNode **list, void *e ) #else list_add( list, e ) ListNode **list; void *e; #endif { ListNode *p, *tail; require(e!=NULL, "list_add: attempting to add NULL list element"); p = newListNode; require(p!=NULL, "list_add: cannot alloc new list node"); p->elem = e; if ( *list == NULL ) { ListNode *sentinel = newListNode; require(sentinel!=NULL, "list_add: cannot alloc sentinel node"); *list=sentinel; sentinel->next = p; sentinel->elem = (char *)p; /* set tail pointer */ } else /* find end of list */ { tail = (ListNode *) (*list)->elem; /* get tail pointer */ tail->next = p; (*list)->elem = (char *) p; /* reset tail */ } } /* MR10 list_free() frees the ListNode elements in the list */ /* MR10 if freeData then free the data elements of the list too */ void #ifdef __USE_PROTOS list_free(ListNode **list,int freeData) #else list_free(list,freeData) ListNode **list; int freeData; #endif { ListNode *p; ListNode *next; if (list == NULL) return; if (*list == NULL) return; for (p=*list; p != NULL; p=next) { next=p->next; if (freeData && p->elem != NULL) { free( (char *) p->elem); }; free( (char *) p); }; *list=NULL; } void #ifdef __USE_PROTOS list_apply( ListNode *list, void (*f)(void *) ) #else list_apply( list, f ) ListNode *list; void (*f)(); #endif { ListNode *p; require(f!=NULL, "list_apply: NULL function to apply"); if ( list == NULL ) return; for (p = list->next; p!=NULL; p=p->next) (*f)( p->elem ); } /* MR27 */ #ifdef __USE_PROTOS int list_search_cstring(ListNode *list, char * cstring) #else int list_search_cstring(list, cstring) ListNode * list; char * cstring; #endif { ListNode *p; if (list == NULL ) return 0; for (p = list->next; p!=NULL; p=p->next) { if (p->elem == NULL) continue; if (0 == strcmp((char *) p->elem , cstring)) return 1; } return 0; } /* F O L L O W C y c l e S t u f f */ /* make a key based upon (rulename, computation, k value). * Computation values are 'i'==FIRST, 'o'==FOLLOW. */ /* MR10 Make the key all characters so it can be read easily */ /* MR10 by a simple dump program. Also, separates */ /* MR10 'o' and 'i' from rule name */ char * #ifdef __USE_PROTOS Fkey( char *rule, int computation, int k ) #else Fkey( rule, computation, k ) char *rule; int computation; int k; #endif { static char key[MaxRuleName+2+2+1]; /* MR10 */ int i; if ( k > 99 ) /* MR10 */ fatal("k>99 is too big for this implementation of ANTLR!\n"); /* MR10 */ if ( (i=strlen(rule)) > MaxRuleName ) /* MR10 */ fatal( eMsgd("rule name > max of %d\n", MaxRuleName) ); /* MR10 */ strcpy(key,rule); /* MR10 */ key[i]='*'; /* MR10 */ key[i+1] = (char) computation; /* MR20 G. Hobbelt */ /* MR10 */ if (k < 10) { /* MR10 */ key[i+2] = (char) ( '0' + k); /* MR10 */ key[i+3] = '\0'; /* MR10 */ } else { /* MR10 */ key[i+2] = (char) ( '0' + k/10); /* MR10 */ key[i+3] = (char) ( '0' + k % 10); /* MR10 */ key[i+4] = '\0'; /* MR10 */ }; return key; } /* Push a rule onto the kth FOLLOW stack */ void #ifdef __USE_PROTOS FoPush( char *rule, int k ) #else FoPush( rule, k ) char *rule; int k; #endif { RuleEntry *r; require(rule!=NULL, "FoPush: tried to push NULL rule"); require(k<=CLL_k, "FoPush: tried to access non-existent stack"); /*fprintf(stderr, "FoPush(%s)\n", rule);*/ r = (RuleEntry *) hash_get(Rname, rule); if ( r == NULL ) {fatal_internal( eMsg1("rule %s must be defined but isn't", rule) );} if ( FoStack[k] == NULL ) /* Does the kth stack exist yet? */ { /*fprintf(stderr, "allocating FoStack\n");*/ FoStack[k] = (int *) calloc(FoStackSize, sizeof(int)); require(FoStack[k]!=NULL, "FoPush: cannot allocate FOLLOW stack\n"); } if ( FoTOS[k] == NULL ) { FoTOS[k]=FoStack[k]; *(FoTOS[k]) = r->rulenum; } else { #ifdef MEMCHK require(valid(FoStack[k]), "FoPush: invalid FoStack"); #endif if ( FoTOS[k] >= &(FoStack[k][FoStackSize-1]) ) fatal( eMsgd("exceeded max depth of FOLLOW recursion (%d)\n", FoStackSize) ); require(FoTOS[k]>=FoStack[k], eMsg1("FoPush: FoStack stack-ptr is playing out of its sandbox", rule)); ++(FoTOS[k]); *(FoTOS[k]) = r->rulenum; } { /* **** int *p; **** fprintf(stderr, "FoStack[k=%d]:\n", k); **** for (p=FoStack[k]; p<=FoTOS[k]; p++) **** { **** fprintf(stderr, "\t%s\n", RulePtr[*p]->rname); **** } */ } } /* Pop one rule off of the FOLLOW stack. TOS ptr is NULL if empty. */ void #ifdef __USE_PROTOS FoPop( int k ) #else FoPop( k ) int k; #endif { require(k<=CLL_k, "FoPop: tried to access non-existent stack"); /*fprintf(stderr, "FoPop\n");*/ require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]), "FoPop: FoStack stack-ptr is playing out of its sandbox"); if ( FoTOS[k] == FoStack[k] ) FoTOS[k] = NULL; else (FoTOS[k])--; } /* Compute FOLLOW cycle. * Mark all FOLLOW sets for rules in cycle as incomplete. * Then, save cycle on the cycle list (Cycles) for later resolution. * The Cycle is stored in the form: * (head of cycle==croot, rest of rules in cycle==cyclicDep) * * e.g. (Fo means "FOLLOW of", "-->" means requires or depends on) * * Fo(x)-->Fo(a)-->Fo(b)-->Fo(c)-->Fo(x) * ^----Infinite recursion (cycle) * * the cycle would be: x -> {a,b,c} or stored as (x,{a,b,c}). Fo(x) depends * on the FOLLOW of a,b, and c. The root of a cycle is always complete after * Fo(x) finishes. Fo(a,b,c) however are not. It turns out that all rules * in a FOLLOW cycle have the same FOLLOW set. */ void #ifdef __USE_PROTOS RegisterCycle( char *rule, int k ) #else RegisterCycle( rule, k ) char *rule; int k; #endif { CacheEntry *f; Cycle *c; int *p; RuleEntry *r; require(rule!=NULL, "RegisterCycle: tried to register NULL rule"); require(k<=CLL_k, "RegisterCycle: tried to access non-existent stack"); /*fprintf(stderr, "RegisterCycle(%s)\n", rule);*/ /* Find cycle start */ r = (RuleEntry *) hash_get(Rname, rule); require(r!=NULL,eMsg1("rule %s must be defined but isn't", rule)); require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]), eMsg1("RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox", rule)); /*** if ( FoTOS[k]&(FoStack[k][FoStackSize-1]) ) **** { **** fprintf(stderr, "RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox\n", **** rule); **** fprintf(stderr, "RegisterCycle: sp==0x%x out of bounds 0x%x...0x%x\n", **** FoTOS[k], FoStack[k], &(FoStack[k][FoStackSize-1])); **** exit(PCCTS_EXIT_FAILURE); **** } ****/ #ifdef MEMCHK require(valid(FoStack[k]), "RegisterCycle: invalid FoStack"); #endif for (p=FoTOS[k]; *p != r->rulenum && p >= FoStack[k]; --p) {;} require(p>=FoStack[k], "RegisterCycle: FoStack is screwed up beyond belief"); if ( p == FoTOS[k] ) return; /* don't worry about cycles to oneself */ /* compute cyclic dependents (rules in cycle except head) */ c = newCycle; require(c!=NULL, "RegisterCycle: couldn't alloc new cycle"); c->cyclicDep = empty; c->croot = *p++; /* record root of cycle */ for (; p<=FoTOS[k]; p++) { /* Mark all dependent rules as incomplete */ f = (CacheEntry *) hash_get(Fcache, Fkey(RulePtr[*p]->rname,'o',k)); if ( f==NULL ) { f = newCacheEntry( Fkey(RulePtr[*p]->rname,'o',k) ); hash_add(Fcache, Fkey(RulePtr[*p]->rname,'o',k), (Entry *)f); } f->incomplete = TRUE; set_orel(*p, &(c->cyclicDep)); /* mark rule as dependent of croot */ } list_add(&(Cycles[k]), (void *)c); } /* make all rules in cycle complete * * while ( some set has changed ) do * for each cycle do * if degree of FOLLOW set for croot > old degree then * update all FOLLOW sets for rules in cyclic dependency * change = TRUE * endif * endfor * endwhile */ void #ifdef __USE_PROTOS ResolveFoCycles( int k ) #else ResolveFoCycles( k ) int k; #endif { ListNode *p, *q; Cycle *c; int changed = 1; CacheEntry *f,*g; int r; /* int i; */ /* MR10 not useful */ unsigned d; unsigned *cursor; /* MR10 */ unsigned *origin; /* MR10 */ /*fprintf(stderr, "Resolving following cycles for %d\n", k);*/ while ( changed ) { changed = 0; /* MR10 i = 0; */ for (p = Cycles[k]->next; p!=NULL; p=p->next) { c = (Cycle *) p->elem; /*fprintf(stderr, "cycle %d: %s -->", i++, RulePtr[c->croot]->rname);*/ /*s_fprT(stderr, c->cyclicDep);*/ /*fprintf(stderr, "\n");*/ f = (CacheEntry *) hash_get(Fcache, Fkey(RulePtr[c->croot]->rname,'o',k)); require(f!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[c->croot]->rname) ); if ( (d=set_deg(f->fset)) > c->deg ) { /*fprintf(stderr, "Fo(%s) has changed\n", RulePtr[c->croot]->rname);*/ changed = 1; c->deg = d; /* update cycle FOLLOW set degree */ /* MR10 */ origin=set_pdq(c->cyclicDep); /* MR10 */ for (cursor=origin; *cursor != nil; cursor++) { /* MR10 */ r=*cursor; /******** while ( !set_nil(c->cyclicDep) ) { *****/ /******** r = set_int(c->cyclicDep); *****/ /******** set_rm(r, c->cyclicDep); *****/ /*fprintf(stderr, "updating Fo(%s)\n", RulePtr[r]->rname);*/ g = (CacheEntry *) hash_get(Fcache, Fkey(RulePtr[r]->rname,'o',k)); require(g!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[r]->rname) ); set_orin(&(g->fset), f->fset); g->incomplete = FALSE; } /* MR10 */ free( (char *) origin); /* MR10 */ origin=NULL; } } /* MR10 - this if statement appears to be meaningless since i is always 0 */ /* MR10 if ( i == 1 ) changed = 0; */ /* if only 1 cycle, no need to repeat */ } /* kill Cycle list */ for (q = Cycles[k]->next; q != NULL; q=p) { p = q->next; set_free( ((Cycle *)q->elem)->cyclicDep ); free((char *)q); } free( (char *)Cycles[k] ); Cycles[k] = NULL; } /* P r i n t i n g S y n t a x D i a g r a m s */ static void #ifdef __USE_PROTOS pBlk( Junction *q, int btype ) #else pBlk( q, btype ) Junction *q; int btype; #endif { int k,a; Junction *alt, *p; q->end->pvisited = TRUE; if ( btype == aLoopBegin ) { require(q->p2!=NULL, "pBlk: invalid ()* block"); PRINT(q->p1); alt = (Junction *)q->p2; PRINT(alt->p1); if ( PrintAnnotate ) { printf(" /* Opt "); k = 1; while ( !set_nil(alt->fset[k]) ) { s_fprT(stdout, alt->fset[k]); if ( k++ == CLL_k ) break; if ( !set_nil(alt->fset[k]) ) printf(", "); } printf(" */\n"); } return; } for (a=1,alt=q; alt != NULL; alt= (Junction *) alt->p2, a++) { if ( alt->p1 != NULL ) PRINT(alt->p1); if ( PrintAnnotate ) { printf( " /* [%d] ", alt->altnum); k = 1; while ( !set_nil(alt->fset[k]) ) { s_fprT(stdout, alt->fset[k]); if ( k++ == CLL_k ) break; if ( !set_nil(alt->fset[k]) ) printf(", "); } if ( alt->p2 == NULL && btype == aOptBlk ) printf( " (optional branch) */\n"); else printf( " */\n"); } /* ignore implied empty alt of Plus blocks */ if ( alt->p2 != NULL && ((Junction *)alt->p2)->ignore ) break; if ( alt->p2 != NULL && !(((Junction *)alt->p2)->p2==NULL && btype == aOptBlk) ) { if ( pLevel == 1 ) { printf("\n"); if ( a+1==pAlt1 || a+1==pAlt2 ) printf("=>"); printf("\t"); } else printf(" "); printf("|"); if ( pLevel == 1 ) { p = (Junction *) ((Junction *)alt->p2)->p1; while ( p!=NULL ) { if ( p->ntype==nAction ) { p=(Junction *)((ActionNode *)p)->next; continue; } if ( p->ntype!=nJunction ) { break; } if ( p->jtype==EndBlk || p->jtype==EndRule ) { p = NULL; break; } p = (Junction *)p->p1; } if ( p==NULL ) printf("\n\t"); /* Empty alt? */ } } } q->end->pvisited = FALSE; } /* How to print out a junction */ void #ifdef __USE_PROTOS pJunc( Junction *q ) #else pJunc( q ) Junction *q; #endif { int dum_k; int doing_rule; require(q!=NULL, "pJunc: NULL node"); require(q->ntype==nJunction, "pJunc: not junction"); if ( q->pvisited == TRUE ) return; q->pvisited = TRUE; switch ( q->jtype ) { case aSubBlk : if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); if ( q->end->p1 != NULL && ((Junction *)q->end->p1)->ntype==nJunction && ((Junction *)q->end->p1)->jtype == EndRule ) doing_rule = 1; else doing_rule = 0; pLevel++; if ( pLevel==1 ) { if ( pAlt1==1 ) printf("=>"); printf("\t"); } else printf(" "); if ( doing_rule ) { if ( pLevel==1 ) printf(" "); pBlk(q,q->jtype); } else { printf("("); if ( pLevel==1 ) printf(" "); pBlk(q,q->jtype); if ( pLevel>1 ) printf(" "); printf(")"); } if ( q->guess ) printf("?"); pLevel--; if ( PrintAnnotate ) freeBlkFsets(q); if ( q->end->p1 != NULL ) PRINT(q->end->p1); break; case aOptBlk : if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); pLevel++; if ( pLevel==1 ) { if ( pAlt1==1 ) printf("=>"); printf("\t"); } else printf(" "); printf("{"); if ( pLevel==1 ) printf(" "); pBlk(q,q->jtype); if ( pLevel>1 ) printf(" "); else printf("\n\t"); printf("}"); pLevel--; if ( PrintAnnotate ) freeBlkFsets(q); if ( q->end->p1 != NULL ) PRINT(q->end->p1); break; case aLoopBegin : if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); pLevel++; if ( pLevel==1 ) { if ( pAlt1==1 ) printf("=>"); printf("\t"); } else printf(" "); printf("("); if ( pLevel==1 ) printf(" "); pBlk(q,q->jtype); if ( pLevel>1 ) printf(" "); else printf("\n\t"); printf(")*"); pLevel--; if ( PrintAnnotate ) freeBlkFsets(q); if ( q->end->p1 != NULL ) PRINT(q->end->p1); break; case aLoopBlk : if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); pBlk(q,q->jtype); if ( PrintAnnotate ) freeBlkFsets(q); break; case aPlusBlk : if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); pLevel++; if ( pLevel==1 ) { if ( pAlt1==1 ) printf("=>"); printf("\t"); } else printf(" "); printf("("); if ( pLevel==1 ) printf(" "); pBlk(q,q->jtype); if ( pLevel>1 ) printf(" "); printf(")+"); pLevel--; if ( PrintAnnotate ) freeBlkFsets(q); if ( q->end->p1 != NULL ) PRINT(q->end->p1); break; case EndBlk : break; case RuleBlk : printf( "\n%s :\n", q->rname); PRINT(q->p1); if ( q->p2 != NULL ) PRINT(q->p2); break; case Generic : if ( q->p1 != NULL ) PRINT(q->p1); q->pvisited = FALSE; if ( q->p2 != NULL ) PRINT(q->p2); break; case EndRule : printf( "\n\t;\n"); break; } q->pvisited = FALSE; } /* How to print out a rule reference node */ void #ifdef __USE_PROTOS pRuleRef( RuleRefNode *p ) #else pRuleRef( p ) RuleRefNode *p; #endif { require(p!=NULL, "pRuleRef: NULL node"); require(p->ntype==nRuleRef, "pRuleRef: not rule ref node"); printf( " %s", p->text); PRINT(p->next); } /* How to print out a terminal node */ void #ifdef __USE_PROTOS pToken( TokNode *p ) #else pToken( p ) TokNode *p; #endif { require(p!=NULL, "pToken: NULL node"); require(p->ntype==nToken, "pToken: not token node"); if ( p->wild_card ) printf(" ."); printf( " %s", TerminalString(p->token)); PRINT(p->next); } /* How to print out a terminal node */ void #ifdef __USE_PROTOS pAction( ActionNode *p ) #else pAction( p ) ActionNode *p; #endif { require(p!=NULL, "pAction: NULL node"); require(p->ntype==nAction, "pAction: not action node"); PRINT(p->next); } /* F i l l F o l l o w L i s t s */ /* * Search all rules for all rule reference nodes, q to rule, r. * Add q->next to follow list dangling off of rule r. * i.e. * * r: -o-R-o-->o--> Ptr to node following rule r in another rule * | * o--> Ptr to node following another reference to r. * * This is the data structure employed to avoid FOLLOW set computation. We * simply compute the FIRST (reach) of the EndRule Node which follows the * list found at the end of all rules which are referenced elsewhere. Rules * not invoked by other rules have no follow list (r->end->p1==NULL). * Generally, only start symbols are not invoked by another rule. * * Note that this mechanism also gives a free cross-reference mechanism. * * The entire syntax diagram is layed out like this: * * SynDiag * | * v * o-->R1--o * | * o-->R2--o * | * ... * | * o-->Rn--o * */ void #ifdef __USE_PROTOS FoLink( Node *p ) #else FoLink( p ) Node *p; #endif { RuleEntry *q; Junction *j = (Junction *) p; RuleRefNode *r = (RuleRefNode *) p; if ( p==NULL ) return; require(p->ntype>=1 && p->ntype<=NumNodeTypes, eMsgd("FoLink: invalid diagram node: ntype==%d",p->ntype)); switch ( p->ntype ) { case nJunction : if ( j->fvisited ) return; if ( j->jtype == EndRule ) return; j->fvisited = TRUE; FoLink( j->p1 ); FoLink( j->p2 ); /* MR14 */ /* MR14 */ /* Need to determine whether the guess block is an */ /* MR14 */ /* of the form (alpha)? beta before follow sets are */ /* MR14 */ /* computed. This is necessary to solve problem */ /* MR14 */ /* of doing follow on the alpha of an (alpha)? beta block. */ /* MR14 */ /* MR14 */ /* This is performed by analysis_point as a side-effect. */ /* MR14 */ /* MR14 */ /* MR14 */ if (j->jtype == aSubBlk && j->guess) { /* MR14 */ Junction *ignore; /* MR14 */ ignore=analysis_point(j); /* MR14 */ } /* MR14 */ return; case nRuleRef : if ( r->linked ) return; q = (RuleEntry *) hash_get(Rname, r->text); if ( q == NULL ) { warnFL( eMsg1("rule %s not defined",r->text), FileStr[r->file], r->line ); } else { if ( r->parms!=NULL && RulePtr[q->rulenum]->pdecl==NULL ) { warnFL( eMsg1("rule %s accepts no parameter(s)", r->text), FileStr[r->file], r->line ); } if ( r->parms==NULL && RulePtr[q->rulenum]->pdecl!=NULL ) { warnFL( eMsg1("rule %s requires parameter(s)", r->text), FileStr[r->file], r->line ); } if ( r->assign!=NULL && RulePtr[q->rulenum]->ret==NULL ) { warnFL( eMsg1("rule %s yields no return value(s)", r->text), FileStr[r->file], r->line ); } if ( r->assign==NULL && RulePtr[q->rulenum]->ret!=NULL ) { warnFL( eMsg1("rule %s returns a value(s)", r->text), FileStr[r->file], r->line ); } if ( !r->linked ) { addFoLink( r->next, r->rname, RulePtr[q->rulenum] ); r->linked = TRUE; } } FoLink( r->next ); return; case nToken : FoLink( ((TokNode *)p)->next ); return; case nAction : FoLink( ((ActionNode *)p)->next ); return; default : fatal_internal("invalid node type"); } } /* * Add a reference to the end of a rule. * * 'r' points to the RuleBlk node in a rule. r->end points to the last node * (EndRule jtype) in a rule. * * Initial: * r->end --> o * * After: * r->end --> o-->o--> Ptr to node following rule r in another rule * | * o--> Ptr to node following another reference to r. * * Note that the links are added to the head of the list so that r->end->p1 * always points to the most recently added follow-link. At the end, it should * point to the last reference found in the grammar (starting from the 1st rule). */ void #ifdef __USE_PROTOS addFoLink( Node *p, char *rname, Junction *r ) #else addFoLink( p, rname, r ) Node *p; char *rname; Junction *r; #endif { Junction *j; require(r!=NULL, "addFoLink: incorrect rule graph"); require(r->end!=NULL, "addFoLink: incorrect rule graph"); require(r->end->jtype==EndRule, "addFoLink: incorrect rule graph"); require(p!=NULL, "addFoLink: NULL FOLLOW link"); j = newJunction(); j->rname = rname; /* rname on follow links point to target rule */ j->p1 = p; /* link to other rule */ j->p2 = (Node *) r->end->p1;/* point to head of list */ r->end->p1 = (Node *) j; /* reset head to point to new node */ } void #ifdef __USE_PROTOS GenCrossRef( Junction *p ) #else GenCrossRef( p ) Junction *p; #endif { set a; Junction *j; RuleEntry *q; unsigned e; require(p!=NULL, "GenCrossRef: why are you passing me a null grammar?"); printf("Cross Reference:\n\n"); a = empty; for (; p!=NULL; p = (Junction *)p->p2) { printf("Rule %20s referenced by {", p->rname); /* make a set of rules for uniqueness */ for (j = (Junction *)(p->end)->p1; j!=NULL; j = (Junction *)j->p2) { q = (RuleEntry *) hash_get(Rname, j->rname); require(q!=NULL, "GenCrossRef: FoLinks are screwed up"); set_orel(q->rulenum, &a); } for (; !set_nil(a); set_rm(e, a)) { e = set_int(a); printf(" %s", RulePtr[e]->rname); } printf(" }\n"); } set_free( a ); } /* The single argument is a pointer to the start of an element of a C++ style function prototypet list. Given a pointer to the start of an formal we must locate the comma (or the end of the string) and locate the datatype, formal name, and initial value expression. The function returns a pointer to the character following the comma which terminates the formal declaration, or a pointer to the end of the string if none was found. I thought we were parsing specialists, how come I'm doing this by hand written code ? Examples of input: Foo f, Foo f = Foo(1), Foo f = Foo(1,2), Foo f = &farray[1,2], Foo f = ",", Foo f = ',', TFoo f = TFoo(1,2), A non-zero value for nesting indicates a problem matching '(' and ')', '[' and ']', '<' and '>', '{' and '}', or improperly terminated string or character literal. */ /* * Don't care if it is a valid string literal or not, just find the end * Start with pointer to leading "\"" */ #ifdef __USE_PROTOS char * skipStringLiteral(char *pCurrent) #else char * skipStringLiteral(pCurrent) char *pCurrent; #endif { char *p = pCurrent; if (*p == 0) return p; require (*p == '\"', "skipStringLiteral") p++; for (p = p; *p != 0; p++) { if (*p == '\\') { p++; if (*p == 0) break; p++; } if (*p == '\"') { p++; break; } } return p; } /* * Don't care if it is a valid character literal or not, just find the end * Start with pointer to leading "'" */ #ifdef __USE_PROTOS char * skipCharLiteral(char *pStart) #else char * skipCharLiteral(pStart) char *pStart; #endif { char *p = pStart; if (*p == 0) return p; require (*p == '\'', "skipCharLiteral") p++; for (p = p; *p != 0; p++) { if (*p == '\\') { p++; if (*p == 0) break; p++; } if (*p == '\'') { p++; break; } } return p; } #ifdef __USE_PROTOS char * skipSpaces(char *pStart) #else char * skipSpaces(pStart) char * pStart; #endif { char *p = pStart; while (*p != 0 && isspace(*p)) p++; return p; } #ifdef __USE_PROTOS char * skipToSeparatorOrEqualSign(char *pStart, int *pNest) #else char * skipToSeparatorOrEqualSign(pStart, pNest) char *pStart; int *pNest; #endif { char *p = pStart; int nest = 0; *pNest = (-1); while (*p != 0) { switch (*p) { case '(' : case '[' : case '<' : case '{' : nest++; p++; break; case ')' : case ']' : case '>' : case '}' : nest--; p++; break; case '"' : p = skipStringLiteral(p); break; case '\'' : p = skipCharLiteral(p); break; case '\\': p++; if (*p == 0) goto EXIT; p++; break; case ',': case '=': if (nest == 0) goto EXIT; p++; break; default: p++; } } EXIT: *pNest = nest; return p; } #ifdef __USE_PROTOS char * skipToSeparator(char *pStart, int *pNest) #else char * skipToSeparator(pStart, pNest) char *pStart; int *pNest; #endif { char * p = pStart; for ( ; ; ) { p = skipToSeparatorOrEqualSign(p, pNest); if (*pNest != 0) return p; if (*p == ',') return p; if (*p == 0) return p; p++; } } /* skip to just past the "=" separating the declaration from the initialization value */ #ifdef __USE_PROTOS char * getInitializer(char *pStart) #else char * getInitializer(pStart) char * pStart; #endif { char *p; char *pDataType; char *pSymbol; char *pEqualSign; char *pValue; char *pSeparator; int nest = 0; require(pStart!=NULL, "getInitializer: invalid string"); p = endFormal(pStart, &pDataType, &pSymbol, &pEqualSign, &pValue, &pSeparator, &nest); if (nest != 0) return NULL; if (pEqualSign == NULL) return NULL; if (pValue == NULL) return NULL; return strBetween(pValue, NULL, pSeparator); } /* Examines the string from pStart to pEnd-1. If the string has 0 length or is entirely white space returns 1. Otherwise 0. */ #ifdef __USE_PROTOS int isWhiteString(const char *pStart, const char *pEnd) #else int isWhiteString(pStart, pEnd) const char *pStart; const char *pEnd; #endif { const char *p; for (p = pStart; p < pEnd; p++) { if (! isspace(*p)) return 0; } return 1; } /* This replaces HasComma() which couldn't distinguish foo ["a,b"] from: foo[a,b] */ #ifdef __USE_PROTOS int hasMultipleOperands(char *pStart) #else int hasMultipleOperands(pStart) char *pStart; #endif { char *p = pStart; int nest = 0; p = skipSpaces(p); if (*p == 0) return 0; p = skipToSeparator(p, &nest); if (nest == 0 && *p == ',') return 1; return 0; } #define MAX_STR_BETWEEN_WORK_AREA 1000 static char strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA]; /* strBetween(pStart, pNext, pStop) Creates a null terminated string by copying the text between two pointers to a work area. The start of the string is pStart. The end of the string is the character before pNext, or if pNext is null then the character before pStop. Trailing spaces are not included in the copy operation. This is used when a string contains several parts. The pNext part may be optional. The pStop will stop the scan when the optional part is not present (is a null pointer). */ #ifdef __USE_PROTOS char *strBetween(char *pStart, char *pNext, char *pStop) #else char *strBetween(pStart, pNext, pStop) char *pStart; char *pNext; char *pStop; #endif { char *p; char *q = strBetweenWorkArea; const char *pEnd; pEnd = (pNext != NULL) ? pNext : pStop; require (pEnd != NULL, "pEnd == NULL"); require (pEnd >= pStart, "pEnd < pStart"); for (pEnd--; pEnd >= pStart; pEnd--) { /* MR31 */ if (! isspace(*pEnd)) break; } for (p = pStart; p <= pEnd && q < &strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA-2]; p++, q++) { *q = *p; } *q = 0; return strBetweenWorkArea; } /* function Returns pointer to character following separator at value which to continue search for next formal. If at the end of the string a pointer to the null byte at the end of the string is returned. pStart Pointer to the starting position of the formal list This may be the middle of a longer string, for example when looking for the end of formal #3 starting from the middle of the complete formal list. ppDataType Returns a pointer to the start of the data type in the formal. Example: pointer to "Foo". ppSymbol Returns a pointer to the start of the formal symbol. Example: pointer to "f". ppEqualSign Returns a pointer to the equal sign separating the formal symbol from the initial value. If there is no "=" then this will be NULL. ppValue Returns a pointer to the initial value part of the formal declaration. Example: pointer to "&farray[1,2]" ppSeparator Returns a pointer to the character which terminated the scan. This should be a pointer to a comma or a null byte which terminates the string. pNest Returns the nesting level when a separator was found. This is non-zero for any kind of error. This is zero for a successful parse of this portion of the formal list. */ #ifdef __USE_PROTOS char * endFormal(char *pStart, char **ppDataType, char **ppSymbol, char **ppEqualSign, char **ppValue, char **ppSeparator, int *pNest) #else char * endFormal(pStart, ppDataType, ppSymbol, ppEqualSign, ppValue, ppSeparator, pNest) char *pStart; char **ppDataType; char **ppSymbol; char **ppEqualSign; char **ppValue; char **ppSeparator; int *pNest; #endif { char *p = pStart; char *q; *ppDataType = NULL; *ppSymbol = NULL; *ppEqualSign = NULL; *ppValue = NULL; *ppSeparator = NULL; *pNest = 0; /* The first non-blank is the start of the datatype */ p = skipSpaces(p); if (*p == 0) goto EXIT; *ppDataType = p; /* We are not looking for the symbol, we are looking for the separator that follows the symbol. Then we'll back up. Search for the ',' or '=" or null terminator. */ p = skipToSeparatorOrEqualSign(p, pNest); if (*pNest != 0) goto EXIT; /* Work backwards to find start of symbol Skip spaces between the end of symbol and separator Assume that there are no spaces in the formal, but there is a space preceding the formal */ for (q = &p[-1]; q >= *ppDataType; q--) { if (! isspace(*q)) break; } if (q < *ppDataType) goto EXIT; /* MR26 Handle things like: IIR_Bool (IIR_Decl::*constraint)() Backup until we hit the end of a symbol string, then find the start of the symbol string. This wont' work for functions with prototypes, but works for the most common cases. For others, use typedef names. */ /* MR26 */ for (q = q; q >= *ppDataType; q--) { /* MR26 */ if (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$') break; /* MR26 */ } /* MR26 */ if (q < *ppDataType) goto EXIT; for (q = q; q >= *ppDataType; q--) { if ( ! (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$')) break; } *ppSymbol = &q[1]; if (*p == ',' || *p == 0) { *ppSeparator = p; goto EXIT; } *ppEqualSign = p; p = skipSpaces(++p); *ppValue = p; if (*p == 0) goto EXIT; while (*p != 0 && *pNest == 0 && *p != ',') { p = skipToSeparator(p, pNest); } if (*pNest == 0) *ppSeparator = p; EXIT: if (*p == ',') p++; return p; }