From c8042e10763bca064df257547d04ae3dfcdfaf91 Mon Sep 17 00:00:00 2001 From: Daryl McDaniel Date: Sat, 7 Nov 2015 19:19:19 +0000 Subject: AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 1/5. The Include, Parser, and Python directories from the cPython 2.7.10 distribution. These files are unchanged and set the baseline for subsequent commits. Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Daryl McDaniel git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@18737 6f19259b-4bc3-4df7-8a09-765794883524 --- .../Python/Python-2.7.10/Parser/acceler.c | 125 ++ .../Python/Python-2.7.10/Parser/bitset.c | 66 + .../Python/Python-2.7.10/Parser/firstsets.c | 113 ++ .../Python/Python-2.7.10/Parser/grammar.c | 254 +++ .../Python/Python-2.7.10/Parser/grammar1.c | 57 + .../Python/Python-2.7.10/Parser/listnode.c | 66 + .../Python/Python-2.7.10/Parser/metagrammar.c | 159 ++ .../Python/Python-2.7.10/Parser/myreadline.c | 218 +++ .../Python/Python-2.7.10/Parser/node.c | 164 ++ .../Python/Python-2.7.10/Parser/parser.c | 436 +++++ .../Python/Python-2.7.10/Parser/parser.h | 42 + .../Python/Python-2.7.10/Parser/parsetok.c | 282 ++++ .../Python/Python-2.7.10/Parser/tokenizer.c | 1755 ++++++++++++++++++++ .../Python/Python-2.7.10/Parser/tokenizer.h | 70 + 14 files changed, 3807 insertions(+) create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/acceler.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/bitset.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/firstsets.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/grammar.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/grammar1.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/listnode.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/metagrammar.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/myreadline.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/node.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/parser.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/parser.h create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/parsetok.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/tokenizer.c create mode 100644 AppPkg/Applications/Python/Python-2.7.10/Parser/tokenizer.h (limited to 'AppPkg/Applications/Python/Python-2.7.10/Parser') diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/acceler.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/acceler.c new file mode 100644 index 0000000000..f6036d9739 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/acceler.c @@ -0,0 +1,125 @@ + +/* Parser accelerator module */ + +/* The parser as originally conceived had disappointing performance. + This module does some precomputation that speeds up the selection + of a DFA based upon a token, turning a search through an array + into a simple indexing operation. The parser now cannot work + without the accelerators installed. Note that the accelerators + are installed dynamically when the parser is initialized, they + are not part of the static data structure written on graminit.[ch] + by the parser generator. */ + +#include "pgenheaders.h" +#include "grammar.h" +#include "node.h" +#include "token.h" +#include "parser.h" + +/* Forward references */ +static void fixdfa(grammar *, dfa *); +static void fixstate(grammar *, state *); + +void +PyGrammar_AddAccelerators(grammar *g) +{ + dfa *d; + int i; + d = g->g_dfa; + for (i = g->g_ndfas; --i >= 0; d++) + fixdfa(g, d); + g->g_accel = 1; +} + +void +PyGrammar_RemoveAccelerators(grammar *g) +{ + dfa *d; + int i; + g->g_accel = 0; + d = g->g_dfa; + for (i = g->g_ndfas; --i >= 0; d++) { + state *s; + int j; + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) { + if (s->s_accel) + PyObject_FREE(s->s_accel); + s->s_accel = NULL; + } + } +} + +static void +fixdfa(grammar *g, dfa *d) +{ + state *s; + int j; + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) + fixstate(g, s); +} + +static void +fixstate(grammar *g, state *s) +{ + arc *a; + int k; + int *accel; + int nl = g->g_ll.ll_nlabels; + s->s_accept = 0; + accel = (int *) PyObject_MALLOC(nl * sizeof(int)); + if (accel == NULL) { + fprintf(stderr, "no mem to build parser accelerators\n"); + exit(1); + } + for (k = 0; k < nl; k++) + accel[k] = -1; + a = s->s_arc; + for (k = s->s_narcs; --k >= 0; a++) { + int lbl = a->a_lbl; + label *l = &g->g_ll.ll_label[lbl]; + int type = l->lb_type; + if (a->a_arrow >= (1 << 7)) { + printf("XXX too many states!\n"); + continue; + } + if (ISNONTERMINAL(type)) { + dfa *d1 = PyGrammar_FindDFA(g, type); + int ibit; + if (type - NT_OFFSET >= (1 << 7)) { + printf("XXX too high nonterminal number!\n"); + continue; + } + for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) { + if (testbit(d1->d_first, ibit)) { + if (accel[ibit] != -1) + printf("XXX ambiguity!\n"); + accel[ibit] = a->a_arrow | (1 << 7) | + ((type - NT_OFFSET) << 8); + } + } + } + else if (lbl == EMPTY) + s->s_accept = 1; + else if (lbl >= 0 && lbl < nl) + accel[lbl] = a->a_arrow; + } + while (nl > 0 && accel[nl-1] == -1) + nl--; + for (k = 0; k < nl && accel[k] == -1;) + k++; + if (k < nl) { + int i; + s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int)); + if (s->s_accel == NULL) { + fprintf(stderr, "no mem to add parser accelerators\n"); + exit(1); + } + s->s_lower = k; + s->s_upper = nl; + for (i = 0; k < nl; i++, k++) + s->s_accel[i] = accel[k]; + } + PyObject_FREE(accel); +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/bitset.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/bitset.c new file mode 100644 index 0000000000..3bf5da1dba --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/bitset.c @@ -0,0 +1,66 @@ + +/* Bitset primitives used by the parser generator */ + +#include "pgenheaders.h" +#include "bitset.h" + +bitset +newbitset(int nbits) +{ + int nbytes = NBYTES(nbits); + bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes); + + if (ss == NULL) + Py_FatalError("no mem for bitset"); + + ss += nbytes; + while (--nbytes >= 0) + *--ss = 0; + return ss; +} + +void +delbitset(bitset ss) +{ + PyObject_FREE(ss); +} + +int +addbit(bitset ss, int ibit) +{ + int ibyte = BIT2BYTE(ibit); + BYTE mask = BIT2MASK(ibit); + + if (ss[ibyte] & mask) + return 0; /* Bit already set */ + ss[ibyte] |= mask; + return 1; +} + +#if 0 /* Now a macro */ +int +testbit(bitset ss, int ibit) +{ + return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0; +} +#endif + +int +samebitset(bitset ss1, bitset ss2, int nbits) +{ + int i; + + for (i = NBYTES(nbits); --i >= 0; ) + if (*ss1++ != *ss2++) + return 0; + return 1; +} + +void +mergebitset(bitset ss1, bitset ss2, int nbits) +{ + int i; + + for (i = NBYTES(nbits); --i >= 0; ) + *ss1++ |= *ss2++; +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/firstsets.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/firstsets.c new file mode 100644 index 0000000000..69faf8f09e --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/firstsets.c @@ -0,0 +1,113 @@ + +/* Computation of FIRST stets */ + +#include "pgenheaders.h" +#include "grammar.h" +#include "token.h" + +extern int Py_DebugFlag; + +/* Forward */ +static void calcfirstset(grammar *, dfa *); + +void +addfirstsets(grammar *g) +{ + int i; + dfa *d; + + if (Py_DebugFlag) + printf("Adding FIRST sets ...\n"); + for (i = 0; i < g->g_ndfas; i++) { + d = &g->g_dfa[i]; + if (d->d_first == NULL) + calcfirstset(g, d); + } +} + +static void +calcfirstset(grammar *g, dfa *d) +{ + int i, j; + state *s; + arc *a; + int nsyms; + int *sym; + int nbits; + static bitset dummy; + bitset result; + int type; + dfa *d1; + label *l0; + + if (Py_DebugFlag) + printf("Calculate FIRST set for '%s'\n", d->d_name); + + if (dummy == NULL) + dummy = newbitset(1); + if (d->d_first == dummy) { + fprintf(stderr, "Left-recursion for '%s'\n", d->d_name); + return; + } + if (d->d_first != NULL) { + fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n", + d->d_name); + } + d->d_first = dummy; + + l0 = g->g_ll.ll_label; + nbits = g->g_ll.ll_nlabels; + result = newbitset(nbits); + + sym = (int *)PyObject_MALLOC(sizeof(int)); + if (sym == NULL) + Py_FatalError("no mem for new sym in calcfirstset"); + nsyms = 1; + sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL); + + s = &d->d_state[d->d_initial]; + for (i = 0; i < s->s_narcs; i++) { + a = &s->s_arc[i]; + for (j = 0; j < nsyms; j++) { + if (sym[j] == a->a_lbl) + break; + } + if (j >= nsyms) { /* New label */ + sym = (int *)PyObject_REALLOC(sym, + sizeof(int) * (nsyms + 1)); + if (sym == NULL) + Py_FatalError( + "no mem to resize sym in calcfirstset"); + sym[nsyms++] = a->a_lbl; + type = l0[a->a_lbl].lb_type; + if (ISNONTERMINAL(type)) { + d1 = PyGrammar_FindDFA(g, type); + if (d1->d_first == dummy) { + fprintf(stderr, + "Left-recursion below '%s'\n", + d->d_name); + } + else { + if (d1->d_first == NULL) + calcfirstset(g, d1); + mergebitset(result, + d1->d_first, nbits); + } + } + else if (ISTERMINAL(type)) { + addbit(result, a->a_lbl); + } + } + } + d->d_first = result; + if (Py_DebugFlag) { + printf("FIRST set for '%s': {", d->d_name); + for (i = 0; i < nbits; i++) { + if (testbit(result, i)) + printf(" %s", PyGrammar_LabelRepr(&l0[i])); + } + printf(" }\n"); + } + + PyObject_FREE(sym); +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/grammar.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/grammar.c new file mode 100644 index 0000000000..4cce422d1a --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/grammar.c @@ -0,0 +1,254 @@ + +/* Grammar implementation */ + +#include "Python.h" +#include "pgenheaders.h" + +#include + +#include "token.h" +#include "grammar.h" + +#ifdef RISCOS +#include +#endif + +extern int Py_DebugFlag; + +grammar * +newgrammar(int start) +{ + grammar *g; + + g = (grammar *)PyObject_MALLOC(sizeof(grammar)); + if (g == NULL) + Py_FatalError("no mem for new grammar"); + g->g_ndfas = 0; + g->g_dfa = NULL; + g->g_start = start; + g->g_ll.ll_nlabels = 0; + g->g_ll.ll_label = NULL; + g->g_accel = 0; + return g; +} + +dfa * +adddfa(grammar *g, int type, char *name) +{ + dfa *d; + + g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, + sizeof(dfa) * (g->g_ndfas + 1)); + if (g->g_dfa == NULL) + Py_FatalError("no mem to resize dfa in adddfa"); + d = &g->g_dfa[g->g_ndfas++]; + d->d_type = type; + d->d_name = strdup(name); + d->d_nstates = 0; + d->d_state = NULL; + d->d_initial = -1; + d->d_first = NULL; + return d; /* Only use while fresh! */ +} + +int +addstate(dfa *d) +{ + state *s; + + d->d_state = (state *)PyObject_REALLOC(d->d_state, + sizeof(state) * (d->d_nstates + 1)); + if (d->d_state == NULL) + Py_FatalError("no mem to resize state in addstate"); + s = &d->d_state[d->d_nstates++]; + s->s_narcs = 0; + s->s_arc = NULL; + s->s_lower = 0; + s->s_upper = 0; + s->s_accel = NULL; + s->s_accept = 0; + return s - d->d_state; +} + +void +addarc(dfa *d, int from, int to, int lbl) +{ + state *s; + arc *a; + + assert(0 <= from && from < d->d_nstates); + assert(0 <= to && to < d->d_nstates); + + s = &d->d_state[from]; + s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1)); + if (s->s_arc == NULL) + Py_FatalError("no mem to resize arc list in addarc"); + a = &s->s_arc[s->s_narcs++]; + a->a_lbl = lbl; + a->a_arrow = to; +} + +int +addlabel(labellist *ll, int type, char *str) +{ + int i; + label *lb; + + for (i = 0; i < ll->ll_nlabels; i++) { + if (ll->ll_label[i].lb_type == type && + strcmp(ll->ll_label[i].lb_str, str) == 0) + return i; + } + ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label, + sizeof(label) * (ll->ll_nlabels + 1)); + if (ll->ll_label == NULL) + Py_FatalError("no mem to resize labellist in addlabel"); + lb = &ll->ll_label[ll->ll_nlabels++]; + lb->lb_type = type; + lb->lb_str = strdup(str); + if (Py_DebugFlag) + printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, + PyGrammar_LabelRepr(lb)); + return lb - ll->ll_label; +} + +/* Same, but rather dies than adds */ + +int +findlabel(labellist *ll, int type, char *str) +{ + int i; + + for (i = 0; i < ll->ll_nlabels; i++) { + if (ll->ll_label[i].lb_type == type /*&& + strcmp(ll->ll_label[i].lb_str, str) == 0*/) + return i; + } + fprintf(stderr, "Label %d/'%s' not found\n", type, str); + Py_FatalError("grammar.c:findlabel()"); + return 0; /* Make gcc -Wall happy */ +} + +/* Forward */ +static void translabel(grammar *, label *); + +void +translatelabels(grammar *g) +{ + int i; + +#ifdef Py_DEBUG + printf("Translating labels ...\n"); +#endif + /* Don't translate EMPTY */ + for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++) + translabel(g, &g->g_ll.ll_label[i]); +} + +static void +translabel(grammar *g, label *lb) +{ + int i; + + if (Py_DebugFlag) + printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb)); + + if (lb->lb_type == NAME) { + for (i = 0; i < g->g_ndfas; i++) { + if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { + if (Py_DebugFlag) + printf( + "Label %s is non-terminal %d.\n", + lb->lb_str, + g->g_dfa[i].d_type); + lb->lb_type = g->g_dfa[i].d_type; + free(lb->lb_str); + lb->lb_str = NULL; + return; + } + } + for (i = 0; i < (int)N_TOKENS; i++) { + if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) { + if (Py_DebugFlag) + printf("Label %s is terminal %d.\n", + lb->lb_str, i); + lb->lb_type = i; + free(lb->lb_str); + lb->lb_str = NULL; + return; + } + } + printf("Can't translate NAME label '%s'\n", lb->lb_str); + return; + } + + if (lb->lb_type == STRING) { + if (isalpha(Py_CHARMASK(lb->lb_str[1])) || + lb->lb_str[1] == '_') { + char *p; + char *src; + char *dest; + size_t name_len; + if (Py_DebugFlag) + printf("Label %s is a keyword\n", lb->lb_str); + lb->lb_type = NAME; + src = lb->lb_str + 1; + p = strchr(src, '\''); + if (p) + name_len = p - src; + else + name_len = strlen(src); + dest = (char *)malloc(name_len + 1); + if (!dest) { + printf("Can't alloc dest '%s'\n", src); + return; + } + strncpy(dest, src, name_len); + dest[name_len] = '\0'; + free(lb->lb_str); + lb->lb_str = dest; + } + else if (lb->lb_str[2] == lb->lb_str[0]) { + int type = (int) PyToken_OneChar(lb->lb_str[1]); + if (type != OP) { + lb->lb_type = type; + free(lb->lb_str); + lb->lb_str = NULL; + } + else + printf("Unknown OP label %s\n", + lb->lb_str); + } + else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { + int type = (int) PyToken_TwoChars(lb->lb_str[1], + lb->lb_str[2]); + if (type != OP) { + lb->lb_type = type; + free(lb->lb_str); + lb->lb_str = NULL; + } + else + printf("Unknown OP label %s\n", + lb->lb_str); + } + else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { + int type = (int) PyToken_ThreeChars(lb->lb_str[1], + lb->lb_str[2], + lb->lb_str[3]); + if (type != OP) { + lb->lb_type = type; + free(lb->lb_str); + lb->lb_str = NULL; + } + else + printf("Unknown OP label %s\n", + lb->lb_str); + } + else + printf("Can't translate STRING label %s\n", + lb->lb_str); + } + else + printf("Can't translate label '%s'\n", + PyGrammar_LabelRepr(lb)); +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/grammar1.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/grammar1.c new file mode 100644 index 0000000000..27db22f3f9 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/grammar1.c @@ -0,0 +1,57 @@ + +/* Grammar subroutines needed by parser */ + +#include "Python.h" +#include "pgenheaders.h" +#include "grammar.h" +#include "token.h" + +/* Return the DFA for the given type */ + +dfa * +PyGrammar_FindDFA(grammar *g, register int type) +{ + register dfa *d; +#if 1 + /* Massive speed-up */ + d = &g->g_dfa[type - NT_OFFSET]; + assert(d->d_type == type); + return d; +#else + /* Old, slow version */ + register int i; + + for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) { + if (d->d_type == type) + return d; + } + assert(0); + /* NOTREACHED */ +#endif +} + +char * +PyGrammar_LabelRepr(label *lb) +{ + static char buf[100]; + + if (lb->lb_type == ENDMARKER) + return "EMPTY"; + else if (ISNONTERMINAL(lb->lb_type)) { + if (lb->lb_str == NULL) { + PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type); + return buf; + } + else + return lb->lb_str; + } + else { + if (lb->lb_str == NULL) + return _PyParser_TokenNames[lb->lb_type]; + else { + PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)", + _PyParser_TokenNames[lb->lb_type], lb->lb_str); + return buf; + } + } +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/listnode.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/listnode.c new file mode 100644 index 0000000000..8d59233599 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/listnode.c @@ -0,0 +1,66 @@ + +/* List a node on a file */ + +#include "pgenheaders.h" +#include "token.h" +#include "node.h" + +/* Forward */ +static void list1node(FILE *, node *); +static void listnode(FILE *, node *); + +void +PyNode_ListTree(node *n) +{ + listnode(stdout, n); +} + +static int level, atbol; + +static void +listnode(FILE *fp, node *n) +{ + level = 0; + atbol = 1; + list1node(fp, n); +} + +static void +list1node(FILE *fp, node *n) +{ + if (n == 0) + return; + if (ISNONTERMINAL(TYPE(n))) { + int i; + for (i = 0; i < NCH(n); i++) + list1node(fp, CHILD(n, i)); + } + else if (ISTERMINAL(TYPE(n))) { + switch (TYPE(n)) { + case INDENT: + ++level; + break; + case DEDENT: + --level; + break; + default: + if (atbol) { + int i; + for (i = 0; i < level; ++i) + fprintf(fp, "\t"); + atbol = 0; + } + if (TYPE(n) == NEWLINE) { + if (STR(n) != NULL) + fprintf(fp, "%s", STR(n)); + fprintf(fp, "\n"); + atbol = 1; + } + else + fprintf(fp, "%s ", STR(n)); + break; + } + } + else + fprintf(fp, "? "); +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/metagrammar.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/metagrammar.c new file mode 100644 index 0000000000..299ccaa079 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/metagrammar.c @@ -0,0 +1,159 @@ + +#include "pgenheaders.h" +#include "metagrammar.h" +#include "grammar.h" +#include "pgen.h" +static arc arcs_0_0[3] = { + {2, 0}, + {3, 0}, + {4, 1}, +}; +static arc arcs_0_1[1] = { + {0, 1}, +}; +static state states_0[2] = { + {3, arcs_0_0}, + {1, arcs_0_1}, +}; +static arc arcs_1_0[1] = { + {5, 1}, +}; +static arc arcs_1_1[1] = { + {6, 2}, +}; +static arc arcs_1_2[1] = { + {7, 3}, +}; +static arc arcs_1_3[1] = { + {3, 4}, +}; +static arc arcs_1_4[1] = { + {0, 4}, +}; +static state states_1[5] = { + {1, arcs_1_0}, + {1, arcs_1_1}, + {1, arcs_1_2}, + {1, arcs_1_3}, + {1, arcs_1_4}, +}; +static arc arcs_2_0[1] = { + {8, 1}, +}; +static arc arcs_2_1[2] = { + {9, 0}, + {0, 1}, +}; +static state states_2[2] = { + {1, arcs_2_0}, + {2, arcs_2_1}, +}; +static arc arcs_3_0[1] = { + {10, 1}, +}; +static arc arcs_3_1[2] = { + {10, 1}, + {0, 1}, +}; +static state states_3[2] = { + {1, arcs_3_0}, + {2, arcs_3_1}, +}; +static arc arcs_4_0[2] = { + {11, 1}, + {13, 2}, +}; +static arc arcs_4_1[1] = { + {7, 3}, +}; +static arc arcs_4_2[3] = { + {14, 4}, + {15, 4}, + {0, 2}, +}; +static arc arcs_4_3[1] = { + {12, 4}, +}; +static arc arcs_4_4[1] = { + {0, 4}, +}; +static state states_4[5] = { + {2, arcs_4_0}, + {1, arcs_4_1}, + {3, arcs_4_2}, + {1, arcs_4_3}, + {1, arcs_4_4}, +}; +static arc arcs_5_0[3] = { + {5, 1}, + {16, 1}, + {17, 2}, +}; +static arc arcs_5_1[1] = { + {0, 1}, +}; +static arc arcs_5_2[1] = { + {7, 3}, +}; +static arc arcs_5_3[1] = { + {18, 1}, +}; +static state states_5[4] = { + {3, arcs_5_0}, + {1, arcs_5_1}, + {1, arcs_5_2}, + {1, arcs_5_3}, +}; +static dfa dfas[6] = { + {256, "MSTART", 0, 2, states_0, + "\070\000\000"}, + {257, "RULE", 0, 5, states_1, + "\040\000\000"}, + {258, "RHS", 0, 2, states_2, + "\040\010\003"}, + {259, "ALT", 0, 2, states_3, + "\040\010\003"}, + {260, "ITEM", 0, 5, states_4, + "\040\010\003"}, + {261, "ATOM", 0, 4, states_5, + "\040\000\003"}, +}; +static label labels[19] = { + {0, "EMPTY"}, + {256, 0}, + {257, 0}, + {4, 0}, + {0, 0}, + {1, 0}, + {11, 0}, + {258, 0}, + {259, 0}, + {18, 0}, + {260, 0}, + {9, 0}, + {10, 0}, + {261, 0}, + {16, 0}, + {14, 0}, + {3, 0}, + {7, 0}, + {8, 0}, +}; +static grammar _PyParser_Grammar = { + 6, + dfas, + {19, labels}, + 256 +}; + +grammar * +meta_grammar(void) +{ + return &_PyParser_Grammar; +} + +grammar * +Py_meta_grammar(void) +{ + return meta_grammar(); +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/myreadline.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/myreadline.c new file mode 100644 index 0000000000..ca99e35ced --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/myreadline.c @@ -0,0 +1,218 @@ + +/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c. + By default, or when stdin is not a tty device, we have a super + simple my_readline function using fgets. + Optionally, we can use the GNU readline library. + my_readline() has a different return value from GNU readline(): + - NULL if an interrupt occurred or if an error occurred + - a malloc'ed empty string if EOF was read + - a malloc'ed string ending in \n normally +*/ + +#include "Python.h" +#ifdef MS_WINDOWS +#define WIN32_LEAN_AND_MEAN +#include "windows.h" +#endif /* MS_WINDOWS */ + +#ifdef __VMS +extern char* vms__StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt); +#endif + + +PyThreadState* _PyOS_ReadlineTState; + +#ifdef WITH_THREAD +#include "pythread.h" +static PyThread_type_lock _PyOS_ReadlineLock = NULL; +#endif + +int (*PyOS_InputHook)(void) = NULL; + +#ifdef RISCOS +int Py_RISCOSWimpFlag; +#endif + +/* This function restarts a fgets() after an EINTR error occurred + except if PyOS_InterruptOccurred() returns true. */ + +static int +my_fgets(char *buf, int len, FILE *fp) +{ + char *p; +#ifdef MS_WINDOWS + int i; +#endif + + while (1) { + if (PyOS_InputHook != NULL) + (void)(PyOS_InputHook)(); + errno = 0; + clearerr(fp); + p = fgets(buf, len, fp); + if (p != NULL) + return 0; /* No error */ +#ifdef MS_WINDOWS + /* Ctrl-C anywhere on the line or Ctrl-Z if the only character + on a line will set ERROR_OPERATION_ABORTED. Under normal + circumstances Ctrl-C will also have caused the SIGINT handler + to fire. This signal fires in another thread and is not + guaranteed to have occurred before this point in the code. + + Therefore: check in a small loop to see if the trigger has + fired, in which case assume this is a Ctrl-C event. If it + hasn't fired within 10ms assume that this is a Ctrl-Z on its + own or that the signal isn't going to fire for some other + reason and drop through to check for EOF. + */ + if (GetLastError()==ERROR_OPERATION_ABORTED) { + for (i = 0; i < 10; i++) { + if (PyOS_InterruptOccurred()) + return 1; + Sleep(1); + } + } +#endif /* MS_WINDOWS */ + if (feof(fp)) { + clearerr(fp); + return -1; /* EOF */ + } +#ifdef EINTR + if (errno == EINTR) { + int s; +#ifdef WITH_THREAD + PyEval_RestoreThread(_PyOS_ReadlineTState); +#endif + s = PyErr_CheckSignals(); +#ifdef WITH_THREAD + PyEval_SaveThread(); +#endif + if (s < 0) + return 1; + /* try again */ + continue; + } +#endif + if (PyOS_InterruptOccurred()) { + return 1; /* Interrupt */ + } + return -2; /* Error */ + } + /* NOTREACHED */ +} + + +/* Readline implementation using fgets() */ + +char * +PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) +{ + size_t n; + char *p; + n = 100; + if ((p = (char *)PyMem_MALLOC(n)) == NULL) + return NULL; + fflush(sys_stdout); +#ifndef RISCOS + if (prompt) + fprintf(stderr, "%s", prompt); +#else + if (prompt) { + if(Py_RISCOSWimpFlag) + fprintf(stderr, "\x0cr%s\x0c", prompt); + else + fprintf(stderr, "%s", prompt); + } +#endif + fflush(stderr); + switch (my_fgets(p, (int)n, sys_stdin)) { + case 0: /* Normal case */ + break; + case 1: /* Interrupt */ + PyMem_FREE(p); + return NULL; + case -1: /* EOF */ + case -2: /* Error */ + default: /* Shouldn't happen */ + *p = '\0'; + break; + } + n = strlen(p); + while (n > 0 && p[n-1] != '\n') { + size_t incr = n+2; + p = (char *)PyMem_REALLOC(p, n + incr); + if (p == NULL) + return NULL; + if (incr > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "input line too long"); + } + if (my_fgets(p+n, (int)incr, sys_stdin) != 0) + break; + n += strlen(p+n); + } + return (char *)PyMem_REALLOC(p, n+1); +} + + +/* By initializing this function pointer, systems embedding Python can + override the readline function. + + Note: Python expects in return a buffer allocated with PyMem_Malloc. */ + +char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *); + + +/* Interface used by tokenizer.c and bltinmodule.c */ + +char * +PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) +{ + char *rv; + + if (_PyOS_ReadlineTState == PyThreadState_GET()) { + PyErr_SetString(PyExc_RuntimeError, + "can't re-enter readline"); + return NULL; + } + + + if (PyOS_ReadlineFunctionPointer == NULL) { +#ifdef __VMS + PyOS_ReadlineFunctionPointer = vms__StdioReadline; +#else + PyOS_ReadlineFunctionPointer = PyOS_StdioReadline; +#endif + } + +#ifdef WITH_THREAD + if (_PyOS_ReadlineLock == NULL) { + _PyOS_ReadlineLock = PyThread_allocate_lock(); + } +#endif + + _PyOS_ReadlineTState = PyThreadState_GET(); + Py_BEGIN_ALLOW_THREADS +#ifdef WITH_THREAD + PyThread_acquire_lock(_PyOS_ReadlineLock, 1); +#endif + + /* This is needed to handle the unlikely case that the + * interpreter is in interactive mode *and* stdin/out are not + * a tty. This can happen, for example if python is run like + * this: python -i < test1.py + */ + if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout))) + rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt); + else + rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout, + prompt); + Py_END_ALLOW_THREADS + +#ifdef WITH_THREAD + PyThread_release_lock(_PyOS_ReadlineLock); +#endif + + _PyOS_ReadlineTState = NULL; + + return rv; +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/node.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/node.c new file mode 100644 index 0000000000..4f1fbf8f23 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/node.c @@ -0,0 +1,164 @@ +/* Parse tree node implementation */ + +#include "Python.h" +#include "node.h" +#include "errcode.h" + +node * +PyNode_New(int type) +{ + node *n = (node *) PyObject_MALLOC(1 * sizeof(node)); + if (n == NULL) + return NULL; + n->n_type = type; + n->n_str = NULL; + n->n_lineno = 0; + n->n_nchildren = 0; + n->n_child = NULL; + return n; +} + +/* See comments at XXXROUNDUP below. Returns -1 on overflow. */ +static int +fancy_roundup(int n) +{ + /* Round up to the closest power of 2 >= n. */ + int result = 256; + assert(n > 128); + while (result < n) { + result <<= 1; + if (result <= 0) + return -1; + } + return result; +} + +/* A gimmick to make massive numbers of reallocs quicker. The result is + * a number >= the input. In PyNode_AddChild, it's used like so, when + * we're about to add child number current_size + 1: + * + * if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1): + * allocate space for XXXROUNDUP(current_size + 1) total children + * else: + * we already have enough space + * + * Since a node starts out empty, we must have + * + * XXXROUNDUP(0) < XXXROUNDUP(1) + * + * so that we allocate space for the first child. One-child nodes are very + * common (presumably that would change if we used a more abstract form + * of syntax tree), so to avoid wasting memory it's desirable that + * XXXROUNDUP(1) == 1. That in turn forces XXXROUNDUP(0) == 0. + * + * Else for 2 <= n <= 128, we round up to the closest multiple of 4. Why 4? + * Rounding up to a multiple of an exact power of 2 is very efficient, and + * most nodes with more than one child have <= 4 kids. + * + * Else we call fancy_roundup() to grow proportionately to n. We've got an + * extreme case then (like test_longexp.py), and on many platforms doing + * anything less than proportional growth leads to exorbitant runtime + * (e.g., MacPython), or extreme fragmentation of user address space (e.g., + * Win98). + * + * In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre + * reported that, with this scheme, 89% of PyObject_REALLOC calls in + * PyNode_AddChild passed 1 for the size, and 9% passed 4. So this usually + * wastes very little memory, but is very effective at sidestepping + * platform-realloc disasters on vulnerable platforms. + * + * Note that this would be straightforward if a node stored its current + * capacity. The code is tricky to avoid that. + */ +#define XXXROUNDUP(n) ((n) <= 1 ? (n) : \ + (n) <= 128 ? (((n) + 3) & ~3) : \ + fancy_roundup(n)) + + +int +PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset) +{ + const int nch = n1->n_nchildren; + int current_capacity; + int required_capacity; + node *n; + + if (nch == INT_MAX || nch < 0) + return E_OVERFLOW; + + current_capacity = XXXROUNDUP(nch); + required_capacity = XXXROUNDUP(nch + 1); + if (current_capacity < 0 || required_capacity < 0) + return E_OVERFLOW; + if (current_capacity < required_capacity) { + if (required_capacity > PY_SIZE_MAX / sizeof(node)) { + return E_NOMEM; + } + n = n1->n_child; + n = (node *) PyObject_REALLOC(n, + required_capacity * sizeof(node)); + if (n == NULL) + return E_NOMEM; + n1->n_child = n; + } + + n = &n1->n_child[n1->n_nchildren++]; + n->n_type = type; + n->n_str = str; + n->n_lineno = lineno; + n->n_col_offset = col_offset; + n->n_nchildren = 0; + n->n_child = NULL; + return 0; +} + +/* Forward */ +static void freechildren(node *); +static Py_ssize_t sizeofchildren(node *n); + + +void +PyNode_Free(node *n) +{ + if (n != NULL) { + freechildren(n); + PyObject_FREE(n); + } +} + +Py_ssize_t +_PyNode_SizeOf(node *n) +{ + Py_ssize_t res = 0; + + if (n != NULL) + res = sizeof(node) + sizeofchildren(n); + return res; +} + +static void +freechildren(node *n) +{ + int i; + for (i = NCH(n); --i >= 0; ) + freechildren(CHILD(n, i)); + if (n->n_child != NULL) + PyObject_FREE(n->n_child); + if (STR(n) != NULL) + PyObject_FREE(STR(n)); +} + +static Py_ssize_t +sizeofchildren(node *n) +{ + Py_ssize_t res = 0; + int i; + for (i = NCH(n); --i >= 0; ) + res += sizeofchildren(CHILD(n, i)); + if (n->n_child != NULL) + /* allocated size of n->n_child array */ + res += XXXROUNDUP(NCH(n)) * sizeof(node); + if (STR(n) != NULL) + res += strlen(STR(n)) + 1; + return res; +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/parser.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/parser.c new file mode 100644 index 0000000000..d98dfaaca5 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/parser.c @@ -0,0 +1,436 @@ + +/* Parser implementation */ + +/* For a description, see the comments at end of this file */ + +/* XXX To do: error recovery */ + +#include "Python.h" +#include "pgenheaders.h" +#include "token.h" +#include "grammar.h" +#include "node.h" +#include "parser.h" +#include "errcode.h" + + +#ifdef Py_DEBUG +extern int Py_DebugFlag; +#define D(x) if (!Py_DebugFlag); else x +#else +#define D(x) +#endif + + +/* STACK DATA TYPE */ + +static void s_reset(stack *); + +static void +s_reset(stack *s) +{ + s->s_top = &s->s_base[MAXSTACK]; +} + +#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK]) + +static int +s_push(register stack *s, dfa *d, node *parent) +{ + register stackentry *top; + if (s->s_top == s->s_base) { + fprintf(stderr, "s_push: parser stack overflow\n"); + return E_NOMEM; + } + top = --s->s_top; + top->s_dfa = d; + top->s_parent = parent; + top->s_state = 0; + return 0; +} + +#ifdef Py_DEBUG + +static void +s_pop(register stack *s) +{ + if (s_empty(s)) + Py_FatalError("s_pop: parser stack underflow -- FATAL"); + s->s_top++; +} + +#else /* !Py_DEBUG */ + +#define s_pop(s) (s)->s_top++ + +#endif + + +/* PARSER CREATION */ + +parser_state * +PyParser_New(grammar *g, int start) +{ + parser_state *ps; + + if (!g->g_accel) + PyGrammar_AddAccelerators(g); + ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state)); + if (ps == NULL) + return NULL; + ps->p_grammar = g; +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + ps->p_flags = 0; +#endif + ps->p_tree = PyNode_New(start); + if (ps->p_tree == NULL) { + PyMem_FREE(ps); + return NULL; + } + s_reset(&ps->p_stack); + (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree); + return ps; +} + +void +PyParser_Delete(parser_state *ps) +{ + /* NB If you want to save the parse tree, + you must set p_tree to NULL before calling delparser! */ + PyNode_Free(ps->p_tree); + PyMem_FREE(ps); +} + + +/* PARSER STACK OPERATIONS */ + +static int +shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset) +{ + int err; + assert(!s_empty(s)); + err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset); + if (err) + return err; + s->s_top->s_state = newstate; + return 0; +} + +static int +push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset) +{ + int err; + register node *n; + n = s->s_top->s_parent; + assert(!s_empty(s)); + err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset); + if (err) + return err; + s->s_top->s_state = newstate; + return s_push(s, d, CHILD(n, NCH(n)-1)); +} + + +/* PARSER PROPER */ + +static int +classify(parser_state *ps, int type, char *str) +{ + grammar *g = ps->p_grammar; + register int n = g->g_ll.ll_nlabels; + + if (type == NAME) { + register char *s = str; + register label *l = g->g_ll.ll_label; + register int i; + for (i = n; i > 0; i--, l++) { + if (l->lb_type != NAME || l->lb_str == NULL || + l->lb_str[0] != s[0] || + strcmp(l->lb_str, s) != 0) + continue; +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION && + s[0] == 'p' && strcmp(s, "print") == 0) { + break; /* no longer a keyword */ + } +#endif + D(printf("It's a keyword\n")); + return n - i; + } + } + + { + register label *l = g->g_ll.ll_label; + register int i; + for (i = n; i > 0; i--, l++) { + if (l->lb_type == type && l->lb_str == NULL) { + D(printf("It's a token we know\n")); + return n - i; + } + } + } + + D(printf("Illegal token\n")); + return -1; +} + +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD +static void +future_hack(parser_state *ps) +{ + node *n = ps->p_stack.s_top->s_parent; + node *ch, *cch; + int i; + + /* from __future__ import ..., must have at least 4 children */ + n = CHILD(n, 0); + if (NCH(n) < 4) + return; + ch = CHILD(n, 0); + if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0) + return; + ch = CHILD(n, 1); + if (NCH(ch) == 1 && STR(CHILD(ch, 0)) && + strcmp(STR(CHILD(ch, 0)), "__future__") != 0) + return; + ch = CHILD(n, 3); + /* ch can be a star, a parenthesis or import_as_names */ + if (TYPE(ch) == STAR) + return; + if (TYPE(ch) == LPAR) + ch = CHILD(n, 4); + + for (i = 0; i < NCH(ch); i += 2) { + cch = CHILD(ch, i); + if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { + char *str_ch = STR(CHILD(cch, 0)); + if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { + ps->p_flags |= CO_FUTURE_WITH_STATEMENT; + } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { + ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; + } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { + ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; + } + } + } +} +#endif /* future keyword */ + +int +PyParser_AddToken(register parser_state *ps, register int type, char *str, + int lineno, int col_offset, int *expected_ret) +{ + register int ilabel; + int err; + + D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str)); + + /* Find out which label this token is */ + ilabel = classify(ps, type, str); + if (ilabel < 0) + return E_SYNTAX; + + /* Loop until the token is shifted or an error occurred */ + for (;;) { + /* Fetch the current dfa and state */ + register dfa *d = ps->p_stack.s_top->s_dfa; + register state *s = &d->d_state[ps->p_stack.s_top->s_state]; + + D(printf(" DFA '%s', state %d:", + d->d_name, ps->p_stack.s_top->s_state)); + + /* Check accelerator */ + if (s->s_lower <= ilabel && ilabel < s->s_upper) { + register int x = s->s_accel[ilabel - s->s_lower]; + if (x != -1) { + if (x & (1<<7)) { + /* Push non-terminal */ + int nt = (x >> 8) + NT_OFFSET; + int arrow = x & ((1<<7)-1); + dfa *d1 = PyGrammar_FindDFA( + ps->p_grammar, nt); + if ((err = push(&ps->p_stack, nt, d1, + arrow, lineno, col_offset)) > 0) { + D(printf(" MemError: push\n")); + return err; + } + D(printf(" Push ...\n")); + continue; + } + + /* Shift the token */ + if ((err = shift(&ps->p_stack, type, str, + x, lineno, col_offset)) > 0) { + D(printf(" MemError: shift.\n")); + return err; + } + D(printf(" Shift.\n")); + /* Pop while we are in an accept-only state */ + while (s = &d->d_state + [ps->p_stack.s_top->s_state], + s->s_accept && s->s_narcs == 1) { + D(printf(" DFA '%s', state %d: " + "Direct pop.\n", + d->d_name, + ps->p_stack.s_top->s_state)); +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + if (d->d_name[0] == 'i' && + strcmp(d->d_name, + "import_stmt") == 0) + future_hack(ps); +#endif + s_pop(&ps->p_stack); + if (s_empty(&ps->p_stack)) { + D(printf(" ACCEPT.\n")); + return E_DONE; + } + d = ps->p_stack.s_top->s_dfa; + } + return E_OK; + } + } + + if (s->s_accept) { +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + if (d->d_name[0] == 'i' && + strcmp(d->d_name, "import_stmt") == 0) + future_hack(ps); +#endif + /* Pop this dfa and try again */ + s_pop(&ps->p_stack); + D(printf(" Pop ...\n")); + if (s_empty(&ps->p_stack)) { + D(printf(" Error: bottom of stack.\n")); + return E_SYNTAX; + } + continue; + } + + /* Stuck, report syntax error */ + D(printf(" Error.\n")); + if (expected_ret) { + if (s->s_lower == s->s_upper - 1) { + /* Only one possible expected token */ + *expected_ret = ps->p_grammar-> + g_ll.ll_label[s->s_lower].lb_type; + } + else + *expected_ret = -1; + } + return E_SYNTAX; + } +} + + +#ifdef Py_DEBUG + +/* DEBUG OUTPUT */ + +void +dumptree(grammar *g, node *n) +{ + int i; + + if (n == NULL) + printf("NIL"); + else { + label l; + l.lb_type = TYPE(n); + l.lb_str = STR(n); + printf("%s", PyGrammar_LabelRepr(&l)); + if (ISNONTERMINAL(TYPE(n))) { + printf("("); + for (i = 0; i < NCH(n); i++) { + if (i > 0) + printf(","); + dumptree(g, CHILD(n, i)); + } + printf(")"); + } + } +} + +void +showtree(grammar *g, node *n) +{ + int i; + + if (n == NULL) + return; + if (ISNONTERMINAL(TYPE(n))) { + for (i = 0; i < NCH(n); i++) + showtree(g, CHILD(n, i)); + } + else if (ISTERMINAL(TYPE(n))) { + printf("%s", _PyParser_TokenNames[TYPE(n)]); + if (TYPE(n) == NUMBER || TYPE(n) == NAME) + printf("(%s)", STR(n)); + printf(" "); + } + else + printf("? "); +} + +void +printtree(parser_state *ps) +{ + if (Py_DebugFlag) { + printf("Parse tree:\n"); + dumptree(ps->p_grammar, ps->p_tree); + printf("\n"); + printf("Tokens:\n"); + showtree(ps->p_grammar, ps->p_tree); + printf("\n"); + } + printf("Listing:\n"); + PyNode_ListTree(ps->p_tree); + printf("\n"); +} + +#endif /* Py_DEBUG */ + +/* + +Description +----------- + +The parser's interface is different than usual: the function addtoken() +must be called for each token in the input. This makes it possible to +turn it into an incremental parsing system later. The parsing system +constructs a parse tree as it goes. + +A parsing rule is represented as a Deterministic Finite-state Automaton +(DFA). A node in a DFA represents a state of the parser; an arc represents +a transition. Transitions are either labeled with terminal symbols or +with non-terminals. When the parser decides to follow an arc labeled +with a non-terminal, it is invoked recursively with the DFA representing +the parsing rule for that as its initial state; when that DFA accepts, +the parser that invoked it continues. The parse tree constructed by the +recursively called parser is inserted as a child in the current parse tree. + +The DFA's can be constructed automatically from a more conventional +language description. An extended LL(1) grammar (ELL(1)) is suitable. +Certain restrictions make the parser's life easier: rules that can produce +the empty string should be outlawed (there are other ways to put loops +or optional parts in the language). To avoid the need to construct +FIRST sets, we can require that all but the last alternative of a rule +(really: arc going out of a DFA's state) must begin with a terminal +symbol. + +As an example, consider this grammar: + +expr: term (OP term)* +term: CONSTANT | '(' expr ')' + +The DFA corresponding to the rule for expr is: + +------->.---term-->.-------> + ^ | + | | + \----OP----/ + +The parse tree generated for the input a+b is: + +(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b))) + +*/ diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/parser.h b/AppPkg/Applications/Python/Python-2.7.10/Parser/parser.h new file mode 100644 index 0000000000..bc09396769 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/parser.h @@ -0,0 +1,42 @@ +#ifndef Py_PARSER_H +#define Py_PARSER_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Parser interface */ + +#define MAXSTACK 1500 + +typedef struct { + int s_state; /* State in current DFA */ + dfa *s_dfa; /* Current DFA */ + struct _node *s_parent; /* Where to add next node */ +} stackentry; + +typedef struct { + stackentry *s_top; /* Top entry */ + stackentry s_base[MAXSTACK];/* Array of stack entries */ + /* NB The stack grows down */ +} stack; + +typedef struct { + stack p_stack; /* Stack of parser states */ + grammar *p_grammar; /* Grammar to use */ + node *p_tree; /* Top of parse tree */ +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + unsigned long p_flags; /* see co_flags in Include/code.h */ +#endif +} parser_state; + +parser_state *PyParser_New(grammar *g, int start); +void PyParser_Delete(parser_state *ps); +int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset, + int *expected_ret); +void PyGrammar_AddAccelerators(grammar *g); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PARSER_H */ diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/parsetok.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/parsetok.c new file mode 100644 index 0000000000..ac1eeb6776 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/parsetok.c @@ -0,0 +1,282 @@ + +/* Parser-tokenizer link implementation */ + +#include "pgenheaders.h" +#include "tokenizer.h" +#include "node.h" +#include "grammar.h" +#include "parser.h" +#include "parsetok.h" +#include "errcode.h" +#include "graminit.h" + +int Py_TabcheckFlag; + + +/* Forward */ +static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); +static void initerr(perrdetail *err_ret, const char* filename); + +/* Parse input coming from a string. Return error code, print some errors. */ +node * +PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) +{ + return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); +} + +node * +PyParser_ParseStringFlags(const char *s, grammar *g, int start, + perrdetail *err_ret, int flags) +{ + return PyParser_ParseStringFlagsFilename(s, NULL, + g, start, err_ret, flags); +} + +node * +PyParser_ParseStringFlagsFilename(const char *s, const char *filename, + grammar *g, int start, + perrdetail *err_ret, int flags) +{ + int iflags = flags; + return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, + err_ret, &iflags); +} + +node * +PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, + grammar *g, int start, + perrdetail *err_ret, int *flags) +{ + struct tok_state *tok; + + initerr(err_ret, filename); + + if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) { + err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; + return NULL; + } + + tok->filename = filename ? filename : ""; + if (Py_TabcheckFlag || Py_VerboseFlag) { + tok->altwarning = (tok->filename != NULL); + if (Py_TabcheckFlag >= 2) + tok->alterror++; + } + + return parsetok(tok, g, start, err_ret, flags); +} + +/* Parse input coming from a file. Return error code, print some errors. */ + +node * +PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret) +{ + return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2, + err_ret, 0); +} + +node * +PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret, int flags) +{ + int iflags = flags; + return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags); +} + +node * +PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret, int *flags) +{ + struct tok_state *tok; + + initerr(err_ret, filename); + + if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) { + err_ret->error = E_NOMEM; + return NULL; + } + tok->filename = filename; + if (Py_TabcheckFlag || Py_VerboseFlag) { + tok->altwarning = (filename != NULL); + if (Py_TabcheckFlag >= 2) + tok->alterror++; + } + + return parsetok(tok, g, start, err_ret, flags); +} + +#if 0 +static char with_msg[] = +"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n"; + +static char as_msg[] = +"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n"; + +static void +warn(const char *msg, const char *filename, int lineno) +{ + if (filename == NULL) + filename = ""; + PySys_WriteStderr(msg, filename, lineno); +} +#endif + +/* Parse input coming from the given tokenizer structure. + Return error code. */ + +static node * +parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, + int *flags) +{ + parser_state *ps; + node *n; + int started = 0; + + if ((ps = PyParser_New(g, start)) == NULL) { + fprintf(stderr, "no mem for new parser\n"); + err_ret->error = E_NOMEM; + PyTokenizer_Free(tok); + return NULL; + } +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + if (*flags & PyPARSE_PRINT_IS_FUNCTION) { + ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; + } + if (*flags & PyPARSE_UNICODE_LITERALS) { + ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; + } + +#endif + + for (;;) { + char *a, *b; + int type; + size_t len; + char *str; + int col_offset; + + type = PyTokenizer_Get(tok, &a, &b); + if (type == ERRORTOKEN) { + err_ret->error = tok->done; + break; + } + if (type == ENDMARKER && started) { + type = NEWLINE; /* Add an extra newline */ + started = 0; + /* Add the right number of dedent tokens, + except if a certain flag is given -- + codeop.py uses this. */ + if (tok->indent && + !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) + { + tok->pendin = -tok->indent; + tok->indent = 0; + } + } + else + started = 1; + len = b - a; /* XXX this may compute NULL - NULL */ + str = (char *) PyObject_MALLOC(len + 1); + if (str == NULL) { + fprintf(stderr, "no mem for next token\n"); + err_ret->error = E_NOMEM; + break; + } + if (len > 0) + strncpy(str, a, len); + str[len] = '\0'; + +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD +#endif + if (a >= tok->line_start) + col_offset = a - tok->line_start; + else + col_offset = -1; + + if ((err_ret->error = + PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset, + &(err_ret->expected))) != E_OK) { + if (err_ret->error != E_DONE) { + PyObject_FREE(str); + err_ret->token = type; + } + break; + } + } + + if (err_ret->error == E_DONE) { + n = ps->p_tree; + ps->p_tree = NULL; + } + else + n = NULL; + +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + *flags = ps->p_flags; +#endif + PyParser_Delete(ps); + + if (n == NULL) { + if (tok->lineno <= 1 && tok->done == E_EOF) + err_ret->error = E_EOF; + err_ret->lineno = tok->lineno; + if (tok->buf != NULL) { + char *text = NULL; + size_t len; + assert(tok->cur - tok->buf < INT_MAX); + err_ret->offset = (int)(tok->cur - tok->buf); + len = tok->inp - tok->buf; +#ifdef Py_USING_UNICODE + text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset); + +#endif + if (text == NULL) { + text = (char *) PyObject_MALLOC(len + 1); + if (text != NULL) { + if (len > 0) + strncpy(text, tok->buf, len); + text[len] = '\0'; + } + } + err_ret->text = text; + } + } else if (tok->encoding != NULL) { + /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was + * allocated using PyMem_ + */ + node* r = PyNode_New(encoding_decl); + if (r) + r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1); + if (!r || !r->n_str) { + err_ret->error = E_NOMEM; + if (r) + PyObject_FREE(r); + n = NULL; + goto done; + } + strcpy(r->n_str, tok->encoding); + PyMem_FREE(tok->encoding); + tok->encoding = NULL; + r->n_nchildren = 1; + r->n_child = n; + n = r; + } + +done: + PyTokenizer_Free(tok); + + return n; +} + +static void +initerr(perrdetail *err_ret, const char *filename) +{ + err_ret->error = E_OK; + err_ret->filename = filename; + err_ret->lineno = 0; + err_ret->offset = 0; + err_ret->text = NULL; + err_ret->token = -1; + err_ret->expected = -1; +} diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/tokenizer.c b/AppPkg/Applications/Python/Python-2.7.10/Parser/tokenizer.c new file mode 100644 index 0000000000..086dc56741 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/tokenizer.c @@ -0,0 +1,1755 @@ + +/* Tokenizer implementation */ + +#include "Python.h" +#include "pgenheaders.h" + +#include +#include + +#include "tokenizer.h" +#include "errcode.h" + +#ifndef PGEN +#include "unicodeobject.h" +#include "stringobject.h" +#include "fileobject.h" +#include "codecs.h" +#include "abstract.h" +#include "pydebug.h" +#endif /* PGEN */ + +extern char *PyOS_Readline(FILE *, FILE *, char *); +/* Return malloc'ed string including trailing \n; + empty malloc'ed string for EOF; + NULL if interrupted */ + +/* Don't ever change this -- it would break the portability of Python code */ +#define TABSIZE 8 + +/* Forward */ +static struct tok_state *tok_new(void); +static int tok_nextc(struct tok_state *tok); +static void tok_backup(struct tok_state *tok, int c); + +/* Token names */ + +char *_PyParser_TokenNames[] = { + "ENDMARKER", + "NAME", + "NUMBER", + "STRING", + "NEWLINE", + "INDENT", + "DEDENT", + "LPAR", + "RPAR", + "LSQB", + "RSQB", + "COLON", + "COMMA", + "SEMI", + "PLUS", + "MINUS", + "STAR", + "SLASH", + "VBAR", + "AMPER", + "LESS", + "GREATER", + "EQUAL", + "DOT", + "PERCENT", + "BACKQUOTE", + "LBRACE", + "RBRACE", + "EQEQUAL", + "NOTEQUAL", + "LESSEQUAL", + "GREATEREQUAL", + "TILDE", + "CIRCUMFLEX", + "LEFTSHIFT", + "RIGHTSHIFT", + "DOUBLESTAR", + "PLUSEQUAL", + "MINEQUAL", + "STAREQUAL", + "SLASHEQUAL", + "PERCENTEQUAL", + "AMPEREQUAL", + "VBAREQUAL", + "CIRCUMFLEXEQUAL", + "LEFTSHIFTEQUAL", + "RIGHTSHIFTEQUAL", + "DOUBLESTAREQUAL", + "DOUBLESLASH", + "DOUBLESLASHEQUAL", + "AT", + /* This table must match the #defines in token.h! */ + "OP", + "", + "" +}; + +/* Create and initialize a new tok_state structure */ + +static struct tok_state * +tok_new(void) +{ + struct tok_state *tok = (struct tok_state *)PyMem_MALLOC( + sizeof(struct tok_state)); + if (tok == NULL) + return NULL; + tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; + tok->done = E_OK; + tok->fp = NULL; + tok->input = NULL; + tok->tabsize = TABSIZE; + tok->indent = 0; + tok->indstack[0] = 0; + tok->atbol = 1; + tok->pendin = 0; + tok->prompt = tok->nextprompt = NULL; + tok->lineno = 0; + tok->level = 0; + tok->filename = NULL; + tok->altwarning = 0; + tok->alterror = 0; + tok->alttabsize = 1; + tok->altindstack[0] = 0; + tok->decoding_state = 0; + tok->decoding_erred = 0; + tok->read_coding_spec = 0; + tok->encoding = NULL; + tok->cont_line = 0; +#ifndef PGEN + tok->decoding_readline = NULL; + tok->decoding_buffer = NULL; +#endif + return tok; +} + +static char * +new_string(const char *s, Py_ssize_t len) +{ + char* result = (char *)PyMem_MALLOC(len + 1); + if (result != NULL) { + memcpy(result, s, len); + result[len] = '\0'; + } + return result; +} + +#ifdef PGEN + +static char * +decoding_fgets(char *s, int size, struct tok_state *tok) +{ + return fgets(s, size, tok->fp); +} + +static int +decoding_feof(struct tok_state *tok) +{ + return feof(tok->fp); +} + +static char * +decode_str(const char *str, int exec_input, struct tok_state *tok) +{ + return new_string(str, strlen(str)); +} + +#else /* PGEN */ + +static char * +error_ret(struct tok_state *tok) /* XXX */ +{ + tok->decoding_erred = 1; + if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ + PyMem_FREE(tok->buf); + tok->buf = NULL; + return NULL; /* as if it were EOF */ +} + + +static char * +get_normal_name(char *s) /* for utf-8 and latin-1 */ +{ + char buf[13]; + int i; + for (i = 0; i < 12; i++) { + int c = s[i]; + if (c == '\0') + break; + else if (c == '_') + buf[i] = '-'; + else + buf[i] = tolower(c); + } + buf[i] = '\0'; + if (strcmp(buf, "utf-8") == 0 || + strncmp(buf, "utf-8-", 6) == 0) + return "utf-8"; + else if (strcmp(buf, "latin-1") == 0 || + strcmp(buf, "iso-8859-1") == 0 || + strcmp(buf, "iso-latin-1") == 0 || + strncmp(buf, "latin-1-", 8) == 0 || + strncmp(buf, "iso-8859-1-", 11) == 0 || + strncmp(buf, "iso-latin-1-", 12) == 0) + return "iso-8859-1"; + else + return s; +} + +/* Return the coding spec in S, or NULL if none is found. */ + +static char * +get_coding_spec(const char *s, Py_ssize_t size) +{ + Py_ssize_t i; + /* Coding spec must be in a comment, and that comment must be + * the only statement on the source code line. */ + for (i = 0; i < size - 6; i++) { + if (s[i] == '#') + break; + if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') + return NULL; + } + for (; i < size - 6; i++) { /* XXX inefficient search */ + const char* t = s + i; + if (strncmp(t, "coding", 6) == 0) { + const char* begin = NULL; + t += 6; + if (t[0] != ':' && t[0] != '=') + continue; + do { + t++; + } while (t[0] == '\x20' || t[0] == '\t'); + + begin = t; + while (Py_ISALNUM(t[0]) || + t[0] == '-' || t[0] == '_' || t[0] == '.') + t++; + + if (begin < t) { + char* r = new_string(begin, t - begin); + char* q = get_normal_name(r); + if (r != q) { + PyMem_FREE(r); + r = new_string(q, strlen(q)); + } + return r; + } + } + } + return NULL; +} + +/* Check whether the line contains a coding spec. If it does, + invoke the set_readline function for the new encoding. + This function receives the tok_state and the new encoding. + Return 1 on success, 0 on failure. */ + +static int +check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, + int set_readline(struct tok_state *, const char *)) +{ + char * cs; + int r = 1; + + if (tok->cont_line) { + /* It's a continuation line, so it can't be a coding spec. */ + tok->read_coding_spec = 1; + return 1; + } + cs = get_coding_spec(line, size); + if (!cs) { + Py_ssize_t i; + for (i = 0; i < size; i++) { + if (line[i] == '#' || line[i] == '\n' || line[i] == '\r') + break; + if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') { + /* Stop checking coding spec after a line containing + * anything except a comment. */ + tok->read_coding_spec = 1; + break; + } + } + } else { + tok->read_coding_spec = 1; + if (tok->encoding == NULL) { + assert(tok->decoding_state == 1); /* raw */ + if (strcmp(cs, "utf-8") == 0 || + strcmp(cs, "iso-8859-1") == 0) { + tok->encoding = cs; + } else { +#ifdef Py_USING_UNICODE + r = set_readline(tok, cs); + if (r) { + tok->encoding = cs; + tok->decoding_state = -1; + } + else { + PyErr_Format(PyExc_SyntaxError, + "encoding problem: %s", cs); + PyMem_FREE(cs); + } +#else + /* Without Unicode support, we cannot + process the coding spec. Since there + won't be any Unicode literals, that + won't matter. */ + PyMem_FREE(cs); +#endif + } + } else { /* then, compare cs with BOM */ + r = (strcmp(tok->encoding, cs) == 0); + if (!r) + PyErr_Format(PyExc_SyntaxError, + "encoding problem: %s with BOM", cs); + PyMem_FREE(cs); + } + } + return r; +} + +/* See whether the file starts with a BOM. If it does, + invoke the set_readline function with the new encoding. + Return 1 on success, 0 on failure. */ + +static int +check_bom(int get_char(struct tok_state *), + void unget_char(int, struct tok_state *), + int set_readline(struct tok_state *, const char *), + struct tok_state *tok) +{ + int ch1, ch2, ch3; + ch1 = get_char(tok); + tok->decoding_state = 1; + if (ch1 == EOF) { + return 1; + } else if (ch1 == 0xEF) { + ch2 = get_char(tok); + if (ch2 != 0xBB) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + ch3 = get_char(tok); + if (ch3 != 0xBF) { + unget_char(ch3, tok); + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } +#if 0 + /* Disable support for UTF-16 BOMs until a decision + is made whether this needs to be supported. */ + } else if (ch1 == 0xFE) { + ch2 = get_char(tok); + if (ch2 != 0xFF) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + if (!set_readline(tok, "utf-16-be")) + return 0; + tok->decoding_state = -1; + } else if (ch1 == 0xFF) { + ch2 = get_char(tok); + if (ch2 != 0xFE) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + if (!set_readline(tok, "utf-16-le")) + return 0; + tok->decoding_state = -1; +#endif + } else { + unget_char(ch1, tok); + return 1; + } + if (tok->encoding != NULL) + PyMem_FREE(tok->encoding); + tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ + return 1; +} + +/* Read a line of text from TOK into S, using the stream in TOK. + Return NULL on failure, else S. + + On entry, tok->decoding_buffer will be one of: + 1) NULL: need to call tok->decoding_readline to get a new line + 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and + stored the result in tok->decoding_buffer + 3) PyStringObject *: previous call to fp_readl did not have enough room + (in the s buffer) to copy entire contents of the line read + by tok->decoding_readline. tok->decoding_buffer has the overflow. + In this case, fp_readl is called in a loop (with an expanded buffer) + until the buffer ends with a '\n' (or until the end of the file is + reached): see tok_nextc and its calls to decoding_fgets. +*/ + +static char * +fp_readl(char *s, int size, struct tok_state *tok) +{ +#ifndef Py_USING_UNICODE + /* In a non-Unicode built, this should never be called. */ + Py_FatalError("fp_readl should not be called in this build."); + return NULL; /* Keep compiler happy (not reachable) */ +#else + PyObject* utf8 = NULL; + PyObject* buf = tok->decoding_buffer; + char *str; + Py_ssize_t utf8len; + + /* Ask for one less byte so we can terminate it */ + assert(size > 0); + size--; + + if (buf == NULL) { + buf = PyObject_CallObject(tok->decoding_readline, NULL); + if (buf == NULL) + return error_ret(tok); + if (!PyUnicode_Check(buf)) { + Py_DECREF(buf); + PyErr_SetString(PyExc_SyntaxError, + "codec did not return a unicode object"); + return error_ret(tok); + } + } else { + tok->decoding_buffer = NULL; + if (PyString_CheckExact(buf)) + utf8 = buf; + } + if (utf8 == NULL) { + utf8 = PyUnicode_AsUTF8String(buf); + Py_DECREF(buf); + if (utf8 == NULL) + return error_ret(tok); + } + str = PyString_AsString(utf8); + utf8len = PyString_GET_SIZE(utf8); + if (utf8len > size) { + tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size); + if (tok->decoding_buffer == NULL) { + Py_DECREF(utf8); + return error_ret(tok); + } + utf8len = size; + } + memcpy(s, str, utf8len); + s[utf8len] = '\0'; + Py_DECREF(utf8); + if (utf8len == 0) + return NULL; /* EOF */ + return s; +#endif +} + +/* Set the readline function for TOK to a StreamReader's + readline function. The StreamReader is named ENC. + + This function is called from check_bom and check_coding_spec. + + ENC is usually identical to the future value of tok->encoding, + except for the (currently unsupported) case of UTF-16. + + Return 1 on success, 0 on failure. */ + +static int +fp_setreadl(struct tok_state *tok, const char* enc) +{ + PyObject *reader, *stream, *readline; + + /* XXX: constify filename argument. */ + stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL); + if (stream == NULL) + return 0; + + reader = PyCodec_StreamReader(enc, stream, NULL); + Py_DECREF(stream); + if (reader == NULL) + return 0; + + readline = PyObject_GetAttrString(reader, "readline"); + Py_DECREF(reader); + if (readline == NULL) + return 0; + + tok->decoding_readline = readline; + return 1; +} + +/* Fetch the next byte from TOK. */ + +static int fp_getc(struct tok_state *tok) { + return getc(tok->fp); +} + +/* Unfetch the last byte back into TOK. */ + +static void fp_ungetc(int c, struct tok_state *tok) { + ungetc(c, tok->fp); +} + +/* Read a line of input from TOK. Determine encoding + if necessary. */ + +static char * +decoding_fgets(char *s, int size, struct tok_state *tok) +{ + char *line = NULL; + int badchar = 0; + for (;;) { + if (tok->decoding_state < 0) { + /* We already have a codec associated with + this input. */ + line = fp_readl(s, size, tok); + break; + } else if (tok->decoding_state > 0) { + /* We want a 'raw' read. */ + line = Py_UniversalNewlineFgets(s, size, + tok->fp, NULL); + break; + } else { + /* We have not yet determined the encoding. + If an encoding is found, use the file-pointer + reader functions from now on. */ + if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) + return error_ret(tok); + assert(tok->decoding_state != 0); + } + } + if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) { + if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) { + return error_ret(tok); + } + } +#ifndef PGEN + /* The default encoding is ASCII, so make sure we don't have any + non-ASCII bytes in it. */ + if (line && !tok->encoding) { + unsigned char *c; + for (c = (unsigned char *)line; *c; c++) + if (*c > 127) { + badchar = *c; + break; + } + } + if (badchar) { + char buf[500]; + /* Need to add 1 to the line number, since this line + has not been counted, yet. */ + sprintf(buf, + "Non-ASCII character '\\x%.2x' " + "in file %.200s on line %i, " + "but no encoding declared; " + "see http://python.org/dev/peps/pep-0263/ for details", + badchar, tok->filename, tok->lineno + 1); + PyErr_SetString(PyExc_SyntaxError, buf); + return error_ret(tok); + } +#endif + return line; +} + +static int +decoding_feof(struct tok_state *tok) +{ + if (tok->decoding_state >= 0) { + return feof(tok->fp); + } else { + PyObject* buf = tok->decoding_buffer; + if (buf == NULL) { + buf = PyObject_CallObject(tok->decoding_readline, NULL); + if (buf == NULL) { + error_ret(tok); + return 1; + } else { + tok->decoding_buffer = buf; + } + } + return PyObject_Length(buf) == 0; + } +} + +/* Fetch a byte from TOK, using the string buffer. */ + +static int +buf_getc(struct tok_state *tok) { + return Py_CHARMASK(*tok->str++); +} + +/* Unfetch a byte from TOK, using the string buffer. */ + +static void +buf_ungetc(int c, struct tok_state *tok) { + tok->str--; + assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */ +} + +/* Set the readline function for TOK to ENC. For the string-based + tokenizer, this means to just record the encoding. */ + +static int +buf_setreadl(struct tok_state *tok, const char* enc) { + tok->enc = enc; + return 1; +} + +/* Return a UTF-8 encoding Python string object from the + C byte string STR, which is encoded with ENC. */ + +#ifdef Py_USING_UNICODE +static PyObject * +translate_into_utf8(const char* str, const char* enc) { + PyObject *utf8; + PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL); + if (buf == NULL) + return NULL; + utf8 = PyUnicode_AsUTF8String(buf); + Py_DECREF(buf); + return utf8; +} +#endif + + +static char * +translate_newlines(const char *s, int exec_input, struct tok_state *tok) { + int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length; + char *buf, *current; + char c = '\0'; + buf = PyMem_MALLOC(needed_length); + if (buf == NULL) { + tok->done = E_NOMEM; + return NULL; + } + for (current = buf; *s; s++, current++) { + c = *s; + if (skip_next_lf) { + skip_next_lf = 0; + if (c == '\n') { + c = *++s; + if (!c) + break; + } + } + if (c == '\r') { + skip_next_lf = 1; + c = '\n'; + } + *current = c; + } + /* If this is exec input, add a newline to the end of the string if + there isn't one already. */ + if (exec_input && c != '\n') { + *current = '\n'; + current++; + } + *current = '\0'; + final_length = current - buf + 1; + if (final_length < needed_length && final_length) + /* should never fail */ + buf = PyMem_REALLOC(buf, final_length); + return buf; +} + +/* Decode a byte string STR for use as the buffer of TOK. + Look for encoding declarations inside STR, and record them + inside TOK. */ + +static const char * +decode_str(const char *input, int single, struct tok_state *tok) +{ + PyObject* utf8 = NULL; + const char *str; + const char *s; + const char *newl[2] = {NULL, NULL}; + int lineno = 0; + tok->input = str = translate_newlines(input, single, tok); + if (str == NULL) + return NULL; + tok->enc = NULL; + tok->str = str; + if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) + return error_ret(tok); + str = tok->str; /* string after BOM if any */ + assert(str); +#ifdef Py_USING_UNICODE + if (tok->enc != NULL) { + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return error_ret(tok); + str = PyString_AsString(utf8); + } +#endif + for (s = str;; s++) { + if (*s == '\0') break; + else if (*s == '\n') { + assert(lineno < 2); + newl[lineno] = s; + lineno++; + if (lineno == 2) break; + } + } + tok->enc = NULL; + /* need to check line 1 and 2 separately since check_coding_spec + assumes a single line as input */ + if (newl[0]) { + if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) + return error_ret(tok); + if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) { + if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], + tok, buf_setreadl)) + return error_ret(tok); + } + } +#ifdef Py_USING_UNICODE + if (tok->enc != NULL) { + assert(utf8 == NULL); + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return error_ret(tok); + str = PyString_AsString(utf8); + } +#endif + assert(tok->decoding_buffer == NULL); + tok->decoding_buffer = utf8; /* CAUTION */ + return str; +} + +#endif /* PGEN */ + +/* Set up tokenizer for string */ + +struct tok_state * +PyTokenizer_FromString(const char *str, int exec_input) +{ + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + str = (char *)decode_str(str, exec_input, tok); + if (str == NULL) { + PyTokenizer_Free(tok); + return NULL; + } + + /* XXX: constify members. */ + tok->buf = tok->cur = tok->end = tok->inp = (char*)str; + return tok; +} + + +/* Set up tokenizer for file */ + +struct tok_state * +PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2) +{ + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) { + PyTokenizer_Free(tok); + return NULL; + } + tok->cur = tok->inp = tok->buf; + tok->end = tok->buf + BUFSIZ; + tok->fp = fp; + tok->prompt = ps1; + tok->nextprompt = ps2; + return tok; +} + + +/* Free a tok_state structure */ + +void +PyTokenizer_Free(struct tok_state *tok) +{ + if (tok->encoding != NULL) + PyMem_FREE(tok->encoding); +#ifndef PGEN + Py_XDECREF(tok->decoding_readline); + Py_XDECREF(tok->decoding_buffer); +#endif + if (tok->fp != NULL && tok->buf != NULL) + PyMem_FREE(tok->buf); + if (tok->input) + PyMem_FREE((char *)tok->input); + PyMem_FREE(tok); +} + +#if !defined(PGEN) && defined(Py_USING_UNICODE) +static int +tok_stdin_decode(struct tok_state *tok, char **inp) +{ + PyObject *enc, *sysstdin, *decoded, *utf8; + const char *encoding; + char *converted; + + if (PySys_GetFile((char *)"stdin", NULL) != stdin) + return 0; + sysstdin = PySys_GetObject("stdin"); + if (sysstdin == NULL || !PyFile_Check(sysstdin)) + return 0; + + enc = ((PyFileObject *)sysstdin)->f_encoding; + if (enc == NULL || !PyString_Check(enc)) + return 0; + Py_INCREF(enc); + + encoding = PyString_AsString(enc); + decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL); + if (decoded == NULL) + goto error_clear; + + utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL); + Py_DECREF(decoded); + if (utf8 == NULL) + goto error_clear; + + assert(PyString_Check(utf8)); + converted = new_string(PyString_AS_STRING(utf8), + PyString_GET_SIZE(utf8)); + Py_DECREF(utf8); + if (converted == NULL) + goto error_nomem; + + PyMem_FREE(*inp); + *inp = converted; + if (tok->encoding != NULL) + PyMem_FREE(tok->encoding); + tok->encoding = new_string(encoding, strlen(encoding)); + if (tok->encoding == NULL) + goto error_nomem; + + Py_DECREF(enc); + return 0; + +error_nomem: + Py_DECREF(enc); + tok->done = E_NOMEM; + return -1; + +error_clear: + Py_DECREF(enc); + if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + tok->done = E_ERROR; + return -1; + } + /* Fallback to iso-8859-1: for backward compatibility */ + PyErr_Clear(); + return 0; +} +#endif + +/* Get next char, updating state; error code goes into tok->done */ + +static int +tok_nextc(register struct tok_state *tok) +{ + for (;;) { + if (tok->cur != tok->inp) { + return Py_CHARMASK(*tok->cur++); /* Fast path */ + } + if (tok->done != E_OK) + return EOF; + if (tok->fp == NULL) { + char *end = strchr(tok->inp, '\n'); + if (end != NULL) + end++; + else { + end = strchr(tok->inp, '\0'); + if (end == tok->inp) { + tok->done = E_EOF; + return EOF; + } + } + if (tok->start == NULL) + tok->buf = tok->cur; + tok->line_start = tok->cur; + tok->lineno++; + tok->inp = end; + return Py_CHARMASK(*tok->cur++); + } + if (tok->prompt != NULL) { + char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); + if (tok->nextprompt != NULL) + tok->prompt = tok->nextprompt; + if (newtok == NULL) + tok->done = E_INTR; + else if (*newtok == '\0') { + PyMem_FREE(newtok); + tok->done = E_EOF; + } +#if !defined(PGEN) && defined(Py_USING_UNICODE) + else if (tok_stdin_decode(tok, &newtok) != 0) + PyMem_FREE(newtok); +#endif + else if (tok->start != NULL) { + size_t start = tok->start - tok->buf; + size_t oldlen = tok->cur - tok->buf; + size_t newlen = oldlen + strlen(newtok); + char *buf = tok->buf; + buf = (char *)PyMem_REALLOC(buf, newlen+1); + tok->lineno++; + if (buf == NULL) { + PyMem_FREE(tok->buf); + tok->buf = NULL; + PyMem_FREE(newtok); + tok->done = E_NOMEM; + return EOF; + } + tok->buf = buf; + tok->cur = tok->buf + oldlen; + tok->line_start = tok->cur; + strcpy(tok->buf + oldlen, newtok); + PyMem_FREE(newtok); + tok->inp = tok->buf + newlen; + tok->end = tok->inp + 1; + tok->start = tok->buf + start; + } + else { + tok->lineno++; + if (tok->buf != NULL) + PyMem_FREE(tok->buf); + tok->buf = newtok; + tok->line_start = tok->buf; + tok->cur = tok->buf; + tok->line_start = tok->buf; + tok->inp = strchr(tok->buf, '\0'); + tok->end = tok->inp + 1; + } + } + else { + int done = 0; + Py_ssize_t cur = 0; + char *pt; + if (tok->start == NULL) { + if (tok->buf == NULL) { + tok->buf = (char *) + PyMem_MALLOC(BUFSIZ); + if (tok->buf == NULL) { + tok->done = E_NOMEM; + return EOF; + } + tok->end = tok->buf + BUFSIZ; + } + if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), + tok) == NULL) { + tok->done = E_EOF; + done = 1; + } + else { + tok->done = E_OK; + tok->inp = strchr(tok->buf, '\0'); + done = tok->inp[-1] == '\n'; + } + } + else { + cur = tok->cur - tok->buf; + if (decoding_feof(tok)) { + tok->done = E_EOF; + done = 1; + } + else + tok->done = E_OK; + } + tok->lineno++; + /* Read until '\n' or EOF */ + while (!done) { + Py_ssize_t curstart = tok->start == NULL ? -1 : + tok->start - tok->buf; + Py_ssize_t curvalid = tok->inp - tok->buf; + Py_ssize_t newsize = curvalid + BUFSIZ; + char *newbuf = tok->buf; + newbuf = (char *)PyMem_REALLOC(newbuf, + newsize); + if (newbuf == NULL) { + tok->done = E_NOMEM; + tok->cur = tok->inp; + return EOF; + } + tok->buf = newbuf; + tok->inp = tok->buf + curvalid; + tok->end = tok->buf + newsize; + tok->start = curstart < 0 ? NULL : + tok->buf + curstart; + if (decoding_fgets(tok->inp, + (int)(tok->end - tok->inp), + tok) == NULL) { + /* Break out early on decoding + errors, as tok->buf will be NULL + */ + if (tok->decoding_erred) + return EOF; + /* Last line does not end in \n, + fake one */ + strcpy(tok->inp, "\n"); + } + tok->inp = strchr(tok->inp, '\0'); + done = tok->inp[-1] == '\n'; + } + if (tok->buf != NULL) { + tok->cur = tok->buf + cur; + tok->line_start = tok->cur; + /* replace "\r\n" with "\n" */ + /* For Mac leave the \r, giving a syntax error */ + pt = tok->inp - 2; + if (pt >= tok->buf && *pt == '\r') { + *pt++ = '\n'; + *pt = '\0'; + tok->inp = pt; + } + } + } + if (tok->done != E_OK) { + if (tok->prompt != NULL) + PySys_WriteStderr("\n"); + tok->cur = tok->inp; + return EOF; + } + } + /*NOTREACHED*/ +} + + +/* Back-up one character */ + +static void +tok_backup(register struct tok_state *tok, register int c) +{ + if (c != EOF) { + if (--tok->cur < tok->buf) + Py_FatalError("tok_backup: beginning of buffer"); + if (*tok->cur != c) + *tok->cur = c; + } +} + + +/* Return the token corresponding to a single character */ + +int +PyToken_OneChar(int c) +{ + switch (c) { + case '(': return LPAR; + case ')': return RPAR; + case '[': return LSQB; + case ']': return RSQB; + case ':': return COLON; + case ',': return COMMA; + case ';': return SEMI; + case '+': return PLUS; + case '-': return MINUS; + case '*': return STAR; + case '/': return SLASH; + case '|': return VBAR; + case '&': return AMPER; + case '<': return LESS; + case '>': return GREATER; + case '=': return EQUAL; + case '.': return DOT; + case '%': return PERCENT; + case '`': return BACKQUOTE; + case '{': return LBRACE; + case '}': return RBRACE; + case '^': return CIRCUMFLEX; + case '~': return TILDE; + case '@': return AT; + default: return OP; + } +} + + +int +PyToken_TwoChars(int c1, int c2) +{ + switch (c1) { + case '=': + switch (c2) { + case '=': return EQEQUAL; + } + break; + case '!': + switch (c2) { + case '=': return NOTEQUAL; + } + break; + case '<': + switch (c2) { + case '>': return NOTEQUAL; + case '=': return LESSEQUAL; + case '<': return LEFTSHIFT; + } + break; + case '>': + switch (c2) { + case '=': return GREATEREQUAL; + case '>': return RIGHTSHIFT; + } + break; + case '+': + switch (c2) { + case '=': return PLUSEQUAL; + } + break; + case '-': + switch (c2) { + case '=': return MINEQUAL; + } + break; + case '*': + switch (c2) { + case '*': return DOUBLESTAR; + case '=': return STAREQUAL; + } + break; + case '/': + switch (c2) { + case '/': return DOUBLESLASH; + case '=': return SLASHEQUAL; + } + break; + case '|': + switch (c2) { + case '=': return VBAREQUAL; + } + break; + case '%': + switch (c2) { + case '=': return PERCENTEQUAL; + } + break; + case '&': + switch (c2) { + case '=': return AMPEREQUAL; + } + break; + case '^': + switch (c2) { + case '=': return CIRCUMFLEXEQUAL; + } + break; + } + return OP; +} + +int +PyToken_ThreeChars(int c1, int c2, int c3) +{ + switch (c1) { + case '<': + switch (c2) { + case '<': + switch (c3) { + case '=': + return LEFTSHIFTEQUAL; + } + break; + } + break; + case '>': + switch (c2) { + case '>': + switch (c3) { + case '=': + return RIGHTSHIFTEQUAL; + } + break; + } + break; + case '*': + switch (c2) { + case '*': + switch (c3) { + case '=': + return DOUBLESTAREQUAL; + } + break; + } + break; + case '/': + switch (c2) { + case '/': + switch (c3) { + case '=': + return DOUBLESLASHEQUAL; + } + break; + } + break; + } + return OP; +} + +static int +indenterror(struct tok_state *tok) +{ + if (tok->alterror) { + tok->done = E_TABSPACE; + tok->cur = tok->inp; + return 1; + } + if (tok->altwarning) { + PySys_WriteStderr("%s: inconsistent use of tabs and spaces " + "in indentation\n", tok->filename); + tok->altwarning = 0; + } + return 0; +} + +/* Get next token, after space stripping etc. */ + +static int +tok_get(register struct tok_state *tok, char **p_start, char **p_end) +{ + register int c; + int blankline; + + *p_start = *p_end = NULL; + nextline: + tok->start = NULL; + blankline = 0; + + /* Get indentation level */ + if (tok->atbol) { + register int col = 0; + register int altcol = 0; + tok->atbol = 0; + for (;;) { + c = tok_nextc(tok); + if (c == ' ') + col++, altcol++; + else if (c == '\t') { + col = (col/tok->tabsize + 1) * tok->tabsize; + altcol = (altcol/tok->alttabsize + 1) + * tok->alttabsize; + } + else if (c == '\014') /* Control-L (formfeed) */ + col = altcol = 0; /* For Emacs users */ + else + break; + } + tok_backup(tok, c); + if (c == '#' || c == '\n') { + /* Lines with only whitespace and/or comments + shouldn't affect the indentation and are + not passed to the parser as NEWLINE tokens, + except *totally* empty lines in interactive + mode, which signal the end of a command group. */ + if (col == 0 && c == '\n' && tok->prompt != NULL) + blankline = 0; /* Let it through */ + else + blankline = 1; /* Ignore completely */ + /* We can't jump back right here since we still + may need to skip to the end of a comment */ + } + if (!blankline && tok->level == 0) { + if (col == tok->indstack[tok->indent]) { + /* No change */ + if (altcol != tok->altindstack[tok->indent]) { + if (indenterror(tok)) + return ERRORTOKEN; + } + } + else if (col > tok->indstack[tok->indent]) { + /* Indent -- always one */ + if (tok->indent+1 >= MAXINDENT) { + tok->done = E_TOODEEP; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (altcol <= tok->altindstack[tok->indent]) { + if (indenterror(tok)) + return ERRORTOKEN; + } + tok->pendin++; + tok->indstack[++tok->indent] = col; + tok->altindstack[tok->indent] = altcol; + } + else /* col < tok->indstack[tok->indent] */ { + /* Dedent -- any number, must be consistent */ + while (tok->indent > 0 && + col < tok->indstack[tok->indent]) { + tok->pendin--; + tok->indent--; + } + if (col != tok->indstack[tok->indent]) { + tok->done = E_DEDENT; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (altcol != tok->altindstack[tok->indent]) { + if (indenterror(tok)) + return ERRORTOKEN; + } + } + } + } + + tok->start = tok->cur; + + /* Return pending indents/dedents */ + if (tok->pendin != 0) { + if (tok->pendin < 0) { + tok->pendin++; + return DEDENT; + } + else { + tok->pendin--; + return INDENT; + } + } + + again: + tok->start = NULL; + /* Skip spaces */ + do { + c = tok_nextc(tok); + } while (c == ' ' || c == '\t' || c == '\014'); + + /* Set start of current token */ + tok->start = tok->cur - 1; + + /* Skip comment, while looking for tab-setting magic */ + if (c == '#') { + static char *tabforms[] = { + "tab-width:", /* Emacs */ + ":tabstop=", /* vim, full form */ + ":ts=", /* vim, abbreviated form */ + "set tabsize=", /* will vi never die? */ + /* more templates can be added here to support other editors */ + }; + char cbuf[80]; + char *tp, **cp; + tp = cbuf; + do { + *tp++ = c = tok_nextc(tok); + } while (c != EOF && c != '\n' && + (size_t)(tp - cbuf + 1) < sizeof(cbuf)); + *tp = '\0'; + for (cp = tabforms; + cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]); + cp++) { + if ((tp = strstr(cbuf, *cp))) { + int newsize = atoi(tp + strlen(*cp)); + + if (newsize >= 1 && newsize <= 40) { + tok->tabsize = newsize; + if (Py_VerboseFlag) + PySys_WriteStderr( + "Tab size set to %d\n", + newsize); + } + } + } + while (c != EOF && c != '\n') + c = tok_nextc(tok); + } + + /* Check for EOF and errors now */ + if (c == EOF) { + return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; + } + + /* Identifier (most frequent token!) */ + if (Py_ISALPHA(c) || c == '_') { + /* Process r"", u"" and ur"" */ + switch (c) { + case 'b': + case 'B': + c = tok_nextc(tok); + if (c == 'r' || c == 'R') + c = tok_nextc(tok); + if (c == '"' || c == '\'') + goto letter_quote; + break; + case 'r': + case 'R': + c = tok_nextc(tok); + if (c == '"' || c == '\'') + goto letter_quote; + break; + case 'u': + case 'U': + c = tok_nextc(tok); + if (c == 'r' || c == 'R') + c = tok_nextc(tok); + if (c == '"' || c == '\'') + goto letter_quote; + break; + } + while (c != EOF && (Py_ISALNUM(c) || c == '_')) { + c = tok_nextc(tok); + } + tok_backup(tok, c); + *p_start = tok->start; + *p_end = tok->cur; + return NAME; + } + + /* Newline */ + if (c == '\n') { + tok->atbol = 1; + if (blankline || tok->level > 0) + goto nextline; + *p_start = tok->start; + *p_end = tok->cur - 1; /* Leave '\n' out of the string */ + tok->cont_line = 0; + return NEWLINE; + } + + /* Period or number starting with period? */ + if (c == '.') { + c = tok_nextc(tok); + if (isdigit(c)) { + goto fraction; + } + else { + tok_backup(tok, c); + *p_start = tok->start; + *p_end = tok->cur; + return DOT; + } + } + + /* Number */ + if (isdigit(c)) { + if (c == '0') { + /* Hex, octal or binary -- maybe. */ + c = tok_nextc(tok); + if (c == '.') + goto fraction; +#ifndef WITHOUT_COMPLEX + if (c == 'j' || c == 'J') + goto imaginary; +#endif + if (c == 'x' || c == 'X') { + + /* Hex */ + c = tok_nextc(tok); + if (!isxdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (isxdigit(c)); + } + else if (c == 'o' || c == 'O') { + /* Octal */ + c = tok_nextc(tok); + if (c < '0' || c >= '8') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while ('0' <= c && c < '8'); + } + else if (c == 'b' || c == 'B') { + /* Binary */ + c = tok_nextc(tok); + if (c != '0' && c != '1') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (c == '0' || c == '1'); + } + else { + int found_decimal = 0; + /* Octal; c is first char of it */ + /* There's no 'isoctdigit' macro, sigh */ + while ('0' <= c && c < '8') { + c = tok_nextc(tok); + } + if (isdigit(c)) { + found_decimal = 1; + do { + c = tok_nextc(tok); + } while (isdigit(c)); + } + if (c == '.') + goto fraction; + else if (c == 'e' || c == 'E') + goto exponent; +#ifndef WITHOUT_COMPLEX + else if (c == 'j' || c == 'J') + goto imaginary; +#endif + else if (found_decimal) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + } + if (c == 'l' || c == 'L') + c = tok_nextc(tok); + } + else { + /* Decimal */ + do { + c = tok_nextc(tok); + } while (isdigit(c)); + if (c == 'l' || c == 'L') + c = tok_nextc(tok); + else { + /* Accept floating point numbers. */ + if (c == '.') { + fraction: + /* Fraction */ + do { + c = tok_nextc(tok); + } while (isdigit(c)); + } + if (c == 'e' || c == 'E') { + int e; + exponent: + e = c; + /* Exponent part */ + c = tok_nextc(tok); + if (c == '+' || c == '-') { + c = tok_nextc(tok); + if (!isdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + } else if (!isdigit(c)) { + tok_backup(tok, c); + tok_backup(tok, e); + *p_start = tok->start; + *p_end = tok->cur; + return NUMBER; + } + do { + c = tok_nextc(tok); + } while (isdigit(c)); + } +#ifndef WITHOUT_COMPLEX + if (c == 'j' || c == 'J') + /* Imaginary part */ + imaginary: + c = tok_nextc(tok); +#endif + } + } + tok_backup(tok, c); + *p_start = tok->start; + *p_end = tok->cur; + return NUMBER; + } + + letter_quote: + /* String */ + if (c == '\'' || c == '"') { + Py_ssize_t quote2 = tok->cur - tok->start + 1; + int quote = c; + int triple = 0; + int tripcount = 0; + for (;;) { + c = tok_nextc(tok); + if (c == '\n') { + if (!triple) { + tok->done = E_EOLS; + tok_backup(tok, c); + return ERRORTOKEN; + } + tripcount = 0; + tok->cont_line = 1; /* multiline string. */ + } + else if (c == EOF) { + if (triple) + tok->done = E_EOFS; + else + tok->done = E_EOLS; + tok->cur = tok->inp; + return ERRORTOKEN; + } + else if (c == quote) { + tripcount++; + if (tok->cur - tok->start == quote2) { + c = tok_nextc(tok); + if (c == quote) { + triple = 1; + tripcount = 0; + continue; + } + tok_backup(tok, c); + } + if (!triple || tripcount == 3) + break; + } + else if (c == '\\') { + tripcount = 0; + c = tok_nextc(tok); + if (c == EOF) { + tok->done = E_EOLS; + tok->cur = tok->inp; + return ERRORTOKEN; + } + } + else + tripcount = 0; + } + *p_start = tok->start; + *p_end = tok->cur; + return STRING; + } + + /* Line continuation */ + if (c == '\\') { + c = tok_nextc(tok); + if (c != '\n') { + tok->done = E_LINECONT; + tok->cur = tok->inp; + return ERRORTOKEN; + } + tok->cont_line = 1; + goto again; /* Read next line */ + } + + /* Check for two-character token */ + { + int c2 = tok_nextc(tok); + int token = PyToken_TwoChars(c, c2); +#ifndef PGEN + if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') { + if (PyErr_WarnExplicit(PyExc_DeprecationWarning, + "<> not supported in 3.x; use !=", + tok->filename, tok->lineno, + NULL, NULL)) { + return ERRORTOKEN; + } + } +#endif + if (token != OP) { + int c3 = tok_nextc(tok); + int token3 = PyToken_ThreeChars(c, c2, c3); + if (token3 != OP) { + token = token3; + } else { + tok_backup(tok, c3); + } + *p_start = tok->start; + *p_end = tok->cur; + return token; + } + tok_backup(tok, c2); + } + + /* Keep track of parentheses nesting level */ + switch (c) { + case '(': + case '[': + case '{': + tok->level++; + break; + case ')': + case ']': + case '}': + tok->level--; + break; + } + + /* Punctuation character */ + *p_start = tok->start; + *p_end = tok->cur; + return PyToken_OneChar(c); +} + +int +PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) +{ + int result = tok_get(tok, p_start, p_end); + if (tok->decoding_erred) { + result = ERRORTOKEN; + tok->done = E_DECODE; + } + return result; +} + +/* This function is only called from parsetok. However, it cannot live + there, as it must be empty for PGEN, and we can check for PGEN only + in this file. */ + +#if defined(PGEN) || !defined(Py_USING_UNICODE) +char* +PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset) +{ + return NULL; +} +#else +#ifdef Py_USING_UNICODE +static PyObject * +dec_utf8(const char *enc, const char *text, size_t len) { + PyObject *ret = NULL; + PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace"); + if (unicode_text) { + ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace"); + Py_DECREF(unicode_text); + } + if (!ret) { + PyErr_Clear(); + } + return ret; +} +char * +PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) +{ + char *text = NULL; + if (tok->encoding) { + /* convert source to original encondig */ + PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len); + if (lineobj != NULL) { + int linelen = PyString_Size(lineobj); + const char *line = PyString_AsString(lineobj); + text = PyObject_MALLOC(linelen + 1); + if (text != NULL && line != NULL) { + if (linelen) + strncpy(text, line, linelen); + text[linelen] = '\0'; + } + Py_DECREF(lineobj); + + /* adjust error offset */ + if (*offset > 1) { + PyObject *offsetobj = dec_utf8(tok->encoding, + tok->buf, *offset-1); + if (offsetobj) { + *offset = PyString_Size(offsetobj) + 1; + Py_DECREF(offsetobj); + } + } + + } + } + return text; + +} +#endif /* defined(Py_USING_UNICODE) */ +#endif + + +#ifdef Py_DEBUG + +void +tok_dump(int type, char *start, char *end) +{ + printf("%s", _PyParser_TokenNames[type]); + if (type == NAME || type == NUMBER || type == STRING || type == OP) + printf("(%.*s)", (int)(end - start), start); +} + +#endif diff --git a/AppPkg/Applications/Python/Python-2.7.10/Parser/tokenizer.h b/AppPkg/Applications/Python/Python-2.7.10/Parser/tokenizer.h new file mode 100644 index 0000000000..3de3280d05 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.10/Parser/tokenizer.h @@ -0,0 +1,70 @@ +#ifndef Py_TOKENIZER_H +#define Py_TOKENIZER_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "object.h" + +/* Tokenizer interface */ + +#include "token.h" /* For token types */ + +#define MAXINDENT 100 /* Max indentation level */ + +/* Tokenizer state */ +struct tok_state { + /* Input state; buf <= cur <= inp <= end */ + /* NB an entire line is held in the buffer */ + char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ + char *cur; /* Next character in buffer */ + char *inp; /* End of data in buffer */ + char *end; /* End of input buffer if buf != NULL */ + char *start; /* Start of current token if not NULL */ + int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ + /* NB If done != E_OK, cur must be == inp!!! */ + FILE *fp; /* Rest of input; NULL if tokenizing a string */ + int tabsize; /* Tab spacing */ + int indent; /* Current indentation index */ + int indstack[MAXINDENT]; /* Stack of indents */ + int atbol; /* Nonzero if at begin of new line */ + int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ + char *prompt, *nextprompt; /* For interactive prompting */ + int lineno; /* Current line number */ + int level; /* () [] {} Parentheses nesting level */ + /* Used to allow free continuations inside them */ + /* Stuff for checking on different tab sizes */ + const char *filename; /* For error messages */ + int altwarning; /* Issue warning if alternate tabs don't match */ + int alterror; /* Issue error if alternate tabs don't match */ + int alttabsize; /* Alternate tab spacing */ + int altindstack[MAXINDENT]; /* Stack of alternate indents */ + /* Stuff for PEP 0263 */ + int decoding_state; /* -1:decoding, 0:init, 1:raw */ + int decoding_erred; /* whether erred in decoding */ + int read_coding_spec; /* whether 'coding:...' has been read */ + char *encoding; + int cont_line; /* whether we are in a continuation line. */ + const char* line_start; /* pointer to start of current line */ +#ifndef PGEN + PyObject *decoding_readline; /* codecs.open(...).readline */ + PyObject *decoding_buffer; +#endif + const char* enc; + const char* str; + const char* input; /* Tokenizer's newline translated copy of the string. */ +}; + +extern struct tok_state *PyTokenizer_FromString(const char *, int); +extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *); +extern void PyTokenizer_Free(struct tok_state *); +extern int PyTokenizer_Get(struct tok_state *, char **, char **); +#if defined(PGEN) || defined(Py_USING_UNICODE) +extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, + int len, int *offset); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TOKENIZER_H */ -- cgit v1.2.3