summaryrefslogtreecommitdiff
path: root/ext/ply
diff options
context:
space:
mode:
Diffstat (limited to 'ext/ply')
-rw-r--r--ext/ply/CHANGES158
-rw-r--r--ext/ply/COPYING504
-rw-r--r--ext/ply/README249
-rw-r--r--ext/ply/TODO22
-rw-r--r--ext/ply/doc/ply.html1642
-rw-r--r--ext/ply/example/ansic/README2
-rw-r--r--ext/ply/example/ansic/clex.py161
-rw-r--r--ext/ply/example/ansic/cparse.py859
-rw-r--r--ext/ply/example/calc/calc.py108
-rw-r--r--ext/ply/example/hedit/hedit.py44
-rw-r--r--ext/ply/example/optcalc/README9
-rw-r--r--ext/ply/example/optcalc/calc.py110
-rw-r--r--ext/ply/lex.py681
-rw-r--r--ext/ply/test/README9
-rw-r--r--ext/ply/test/calclex.py46
-rw-r--r--ext/ply/test/lex_doc1.exp1
-rw-r--r--ext/ply/test/lex_doc1.py27
-rw-r--r--ext/ply/test/lex_dup1.exp2
-rw-r--r--ext/ply/test/lex_dup1.py27
-rw-r--r--ext/ply/test/lex_dup2.exp2
-rw-r--r--ext/ply/test/lex_dup2.py31
-rw-r--r--ext/ply/test/lex_dup3.exp2
-rw-r--r--ext/ply/test/lex_dup3.py29
-rw-r--r--ext/ply/test/lex_empty.exp1
-rw-r--r--ext/ply/test/lex_empty.py18
-rw-r--r--ext/ply/test/lex_error1.exp1
-rw-r--r--ext/ply/test/lex_error1.py22
-rw-r--r--ext/ply/test/lex_error2.exp1
-rw-r--r--ext/ply/test/lex_error2.py24
-rw-r--r--ext/ply/test/lex_error3.exp2
-rw-r--r--ext/ply/test/lex_error3.py25
-rw-r--r--ext/ply/test/lex_error4.exp2
-rw-r--r--ext/ply/test/lex_error4.py25
-rw-r--r--ext/ply/test/lex_hedit.exp3
-rw-r--r--ext/ply/test/lex_hedit.py44
-rw-r--r--ext/ply/test/lex_ignore.exp2
-rw-r--r--ext/ply/test/lex_ignore.py29
-rw-r--r--ext/ply/test/lex_re1.exp2
-rw-r--r--ext/ply/test/lex_re1.py25
-rw-r--r--ext/ply/test/lex_rule1.exp2
-rw-r--r--ext/ply/test/lex_rule1.py25
-rw-r--r--ext/ply/test/lex_token1.exp1
-rw-r--r--ext/ply/test/lex_token1.py19
-rw-r--r--ext/ply/test/lex_token2.exp1
-rw-r--r--ext/ply/test/lex_token2.py21
-rw-r--r--ext/ply/test/lex_token3.exp2
-rw-r--r--ext/ply/test/lex_token3.py24
-rw-r--r--ext/ply/test/lex_token4.exp2
-rw-r--r--ext/ply/test/lex_token4.py26
-rw-r--r--ext/ply/test/lex_token5.exp1
-rw-r--r--ext/ply/test/lex_token5.py31
-rwxr-xr-xext/ply/test/testlex.py57
-rw-r--r--ext/ply/test/testyacc.py58
-rw-r--r--ext/ply/test/yacc_badargs.exp3
-rw-r--r--ext/ply/test/yacc_badargs.py67
-rw-r--r--ext/ply/test/yacc_badprec.exp1
-rw-r--r--ext/ply/test/yacc_badprec.py63
-rw-r--r--ext/ply/test/yacc_badprec2.exp3
-rw-r--r--ext/ply/test/yacc_badprec2.py67
-rw-r--r--ext/ply/test/yacc_badrule.exp5
-rw-r--r--ext/ply/test/yacc_badrule.py67
-rw-r--r--ext/ply/test/yacc_badtok.exp1
-rw-r--r--ext/ply/test/yacc_badtok.py68
-rw-r--r--ext/ply/test/yacc_dup.exp4
-rw-r--r--ext/ply/test/yacc_dup.py67
-rw-r--r--ext/ply/test/yacc_error1.exp1
-rw-r--r--ext/ply/test/yacc_error1.py67
-rw-r--r--ext/ply/test/yacc_error2.exp1
-rw-r--r--ext/ply/test/yacc_error2.py67
-rw-r--r--ext/ply/test/yacc_error3.exp1
-rw-r--r--ext/ply/test/yacc_error3.py66
-rw-r--r--ext/ply/test/yacc_inf.exp5
-rw-r--r--ext/ply/test/yacc_inf.py55
-rw-r--r--ext/ply/test/yacc_missing1.exp2
-rw-r--r--ext/ply/test/yacc_missing1.py67
-rw-r--r--ext/ply/test/yacc_nodoc.exp2
-rw-r--r--ext/ply/test/yacc_nodoc.py66
-rw-r--r--ext/ply/test/yacc_noerror.exp2
-rw-r--r--ext/ply/test/yacc_noerror.py64
-rw-r--r--ext/ply/test/yacc_nop.exp2
-rw-r--r--ext/ply/test/yacc_nop.py67
-rw-r--r--ext/ply/test/yacc_notfunc.exp4
-rw-r--r--ext/ply/test/yacc_notfunc.py65
-rw-r--r--ext/ply/test/yacc_notok.exp1
-rw-r--r--ext/ply/test/yacc_notok.py66
-rw-r--r--ext/ply/test/yacc_rr.exp2
-rw-r--r--ext/ply/test/yacc_rr.py71
-rw-r--r--ext/ply/test/yacc_simple.exp1
-rw-r--r--ext/ply/test/yacc_simple.py67
-rw-r--r--ext/ply/test/yacc_sr.exp2
-rw-r--r--ext/ply/test/yacc_sr.py62
-rw-r--r--ext/ply/test/yacc_term1.exp2
-rw-r--r--ext/ply/test/yacc_term1.py67
-rw-r--r--ext/ply/test/yacc_unused.exp4
-rw-r--r--ext/ply/test/yacc_unused.py76
-rw-r--r--ext/ply/test/yacc_uprec.exp2
-rw-r--r--ext/ply/test/yacc_uprec.py62
-rw-r--r--ext/ply/yacc.py1846
98 files changed, 8572 insertions, 0 deletions
diff --git a/ext/ply/CHANGES b/ext/ply/CHANGES
new file mode 100644
index 000000000..9c7334066
--- /dev/null
+++ b/ext/ply/CHANGES
@@ -0,0 +1,158 @@
+Version 1.3
+------------------------------
+12/10/02: jmdyck
+ Various minor adjustments to the code that Dave checked in today.
+ Updated test/yacc_{inf,unused}.exp to reflect today's changes.
+
+12/10/02: beazley
+ Incorporated a variety of minor bug fixes to empty production
+ handling and infinite recursion checking. Contributed by
+ Michael Dyck.
+
+12/10/02: beazley
+ Removed bogus recover() method call in yacc.restart()
+
+Version 1.2
+------------------------------
+11/27/02: beazley
+ Lexer and parser objects are now available as an attribute
+ of tokens and slices respectively. For example:
+
+ def t_NUMBER(t):
+ r'\d+'
+ print t.lexer
+
+ def p_expr_plus(t):
+ 'expr: expr PLUS expr'
+ print t.lexer
+ print t.parser
+
+ This can be used for state management (if needed).
+
+10/31/02: beazley
+ Modified yacc.py to work with Python optimize mode. To make
+ this work, you need to use
+
+ yacc.yacc(optimize=1)
+
+ Furthermore, you need to first run Python in normal mode
+ to generate the necessary parsetab.py files. After that,
+ you can use python -O or python -OO.
+
+ Note: optimized mode turns off a lot of error checking.
+ Only use when you are sure that your grammar is working.
+ Make sure parsetab.py is up to date!
+
+10/30/02: beazley
+ Added cloning of Lexer objects. For example:
+
+ import copy
+ l = lex.lex()
+ lc = copy.copy(l)
+
+ l.input("Some text")
+ lc.input("Some other text")
+ ...
+
+ This might be useful if the same "lexer" is meant to
+ be used in different contexts---or if multiple lexers
+ are running concurrently.
+
+10/30/02: beazley
+ Fixed subtle bug with first set computation and empty productions.
+ Patch submitted by Michael Dyck.
+
+10/30/02: beazley
+ Fixed error messages to use "filename:line: message" instead
+ of "filename:line. message". This makes error reporting more
+ friendly to emacs. Patch submitted by François Pinard.
+
+10/30/02: beazley
+ Improvements to parser.out file. Terminals and nonterminals
+ are sorted instead of being printed in random order.
+ Patch submitted by François Pinard.
+
+10/30/02: beazley
+ Improvements to parser.out file output. Rules are now printed
+ in a way that's easier to understand. Contributed by Russ Cox.
+
+10/30/02: beazley
+ Added 'nonassoc' associativity support. This can be used
+ to disable the chaining of operators like a < b < c.
+ To use, simply specify 'nonassoc' in the precedence table
+
+ precedence = (
+ ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators
+ ('left', 'PLUS', 'MINUS'),
+ ('left', 'TIMES', 'DIVIDE'),
+ ('right', 'UMINUS'), # Unary minus operator
+ )
+
+ Patch contributed by Russ Cox.
+
+10/30/02: beazley
+ Modified the lexer to provide optional support for Python -O and -OO
+ modes. To make this work, Python *first* needs to be run in
+ unoptimized mode. This reads the lexing information and creates a
+ file "lextab.py". Then, run lex like this:
+
+ # module foo.py
+ ...
+ ...
+ lex.lex(optimize=1)
+
+ Once the lextab file has been created, subsequent calls to
+ lex.lex() will read data from the lextab file instead of using
+ introspection. In optimized mode (-O, -OO) everything should
+ work normally despite the loss of doc strings.
+
+ To change the name of the file 'lextab.py' use the following:
+
+ lex.lex(lextab="footab")
+
+ (this creates a file footab.py)
+
+
+Version 1.1 October 25, 2001
+------------------------------
+
+10/25/01: beazley
+ Modified the table generator to produce much more compact data.
+ This should greatly reduce the size of the parsetab.py[c] file.
+ Caveat: the tables still need to be constructed so a little more
+ work is done in parsetab on import.
+
+10/25/01: beazley
+ There may be a possible bug in the cycle detector that reports errors
+ about infinite recursion. I'm having a little trouble tracking it
+ down, but if you get this problem, you can disable the cycle
+ detector as follows:
+
+ yacc.yacc(check_recursion = 0)
+
+10/25/01: beazley
+ Fixed a bug in lex.py that sometimes caused illegal characters to be
+ reported incorrectly. Reported by Sverre Jørgensen.
+
+7/8/01 : beazley
+ Added a reference to the underlying lexer object when tokens are handled by
+ functions. The lexer is available as the 'lexer' attribute. This
+ was added to provide better lexing support for languages such as Fortran
+ where certain types of tokens can't be conveniently expressed as regular
+ expressions (and where the tokenizing function may want to perform a
+ little backtracking). Suggested by Pearu Peterson.
+
+6/20/01 : beazley
+ Modified yacc() function so that an optional starting symbol can be specified.
+ For example:
+
+ yacc.yacc(start="statement")
+
+ Normally yacc always treats the first production rule as the starting symbol.
+ However, if you are debugging your grammar it may be useful to specify
+ an alternative starting symbol. Idea suggested by Rich Salz.
+
+Version 1.0 June 18, 2001
+--------------------------
+Initial public offering
+
diff --git a/ext/ply/COPYING b/ext/ply/COPYING
new file mode 100644
index 000000000..b1e3f5a26
--- /dev/null
+++ b/ext/ply/COPYING
@@ -0,0 +1,504 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/ext/ply/README b/ext/ply/README
new file mode 100644
index 000000000..35b458d4c
--- /dev/null
+++ b/ext/ply/README
@@ -0,0 +1,249 @@
+PLY (Python Lex-Yacc) Version 1.2 (November 27, 2002)
+
+David M. Beazley
+Department of Computer Science
+University of Chicago
+Chicago, IL 60637
+beazley@cs.uchicago.edu
+
+Copyright (C) 2001 David M. Beazley
+
+$Header: /home/stever/bk/newmem2/ext/ply/README 1.1 03/06/06 14:53:34-00:00 stever@ $
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+See the file COPYING for a complete copy of the LGPL.
+
+Introduction
+============
+
+PLY is a 100% Python implementation of the common parsing tools lex
+and yacc. Although several other parsing tools are available for
+Python, there are several reasons why you might want to consider PLY:
+
+ - The tools are very closely modeled after traditional lex/yacc.
+ If you know how to use these tools in C, you will find PLY
+ to be similar.
+
+ - PLY provides *very* extensive error reporting and diagnostic
+ information to assist in parser construction. The original
+ implementation was developed for instructional purposes. As
+ a result, the system tries to identify the most common types
+ of errors made by novice users.
+
+ - PLY provides full support for empty productions, error recovery,
+ precedence specifiers, and moderately ambiguous grammars.
+
+ - Parsing is based on LR-parsing which is fast, memory efficient,
+ better suited to large grammars, and which has a number of nice
+ properties when dealing with syntax errors and other parsing problems.
+ Currently, PLY builds its parsing tables using the SLR algorithm which
+ is slightly weaker than LALR(1) used in traditional yacc.
+
+ - Like John Aycock's excellent SPARK toolkit, PLY uses Python
+ reflection to build lexers and parsers. This greatly simplifies
+ the task of parser construction since it reduces the number of files
+ and eliminates the need to run a separate lex/yacc tool before
+ running your program.
+
+ - PLY can be used to build parsers for "real" programming languages.
+ Although it is not ultra-fast due to its Python implementation,
+ PLY can be used to parse grammars consisting of several hundred
+ rules (as might be found for a language like C). The lexer and LR
+ parser are also reasonably efficient when parsing typically
+ sized programs.
+
+The original version of PLY was developed for an Introduction to
+Compilers course where students used it to build a compiler for a
+simple Pascal-like language. Their compiler had to include lexical
+analysis, parsing, type checking, type inference, and generation of
+assembly code for the SPARC processor. Because of this, the current
+implementation has been extensively tested and debugged. In addition,
+most of the API and error checking steps have been adapted to address
+common usability problems.
+
+How to Use
+==========
+
+PLY consists of two files : lex.py and yacc.py. To use the system,
+simply copy these files to your project and import them like standard
+Python modules.
+
+The file doc/ply.html contains complete documentation on how to use
+the system.
+
+The example directory contains several different examples including a
+PLY specification for ANSI C as given in K&R 2nd Ed. Note: To use
+the examples, you will need to copy the lex.py and yacc.py files to
+the example directory.
+
+A simple example is found at the end of this document
+
+Requirements
+============
+PLY requires the use of Python 2.0 or greater. It should work on
+just about any platform.
+
+Resources
+=========
+
+More information about PLY can be obtained on the PLY webpage at:
+
+ http://systems.cs.uchicago.edu/ply
+
+For a detailed overview of parsing theory, consult the excellent
+book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and
+Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown
+may also be useful.
+
+Given that this is the first release, I welcome your comments on how
+to improve the current implementation. See the TODO file for things that
+still need to be done.
+
+Acknowledgments
+===============
+
+A special thanks is in order for all of the students in CS326 who
+suffered through about 25 different versions of these tools :-).
+
+Example
+=======
+
+Here is a simple example showing a PLY implementation of a calculator with variables.
+
+# -----------------------------------------------------------------------------
+# calc.py
+#
+# A simple calculator with variables.
+# -----------------------------------------------------------------------------
+
+tokens = (
+ 'NAME','NUMBER',
+ 'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
+ 'LPAREN','RPAREN',
+ )
+
+# Tokens
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_TIMES = r'\*'
+t_DIVIDE = r'/'
+t_EQUALS = r'='
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
+
+def t_NUMBER(t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Integer value too large", t.value
+ t.value = 0
+ return t
+
+# Ignored characters
+t_ignore = " \t"
+
+def t_newline(t):
+ r'\n+'
+ t.lineno += t.value.count("\n")
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.skip(1)
+
+# Build the lexer
+import lex
+lex.lex()
+
+# Precedence rules for the arithmetic operators
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names (for storing variables)
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[2] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+while 1:
+ try:
+ s = raw_input('calc > ')
+ except EOFError:
+ break
+ yacc.parse(s)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/ply/TODO b/ext/ply/TODO
new file mode 100644
index 000000000..b2978150d
--- /dev/null
+++ b/ext/ply/TODO
@@ -0,0 +1,22 @@
+The PLY to-do list:
+
+$Header: /home/stever/bk/newmem2/ext/ply/TODO 1.1 03/06/06 14:53:34-00:00 stever@ $
+
+1. Create a Python package using distutils
+
+2. More interesting parsing examples.
+
+3. Work on the ANSI C grammar so that it can actually parse C programs. To do this,
+ some extra code needs to be added to the lexer to deal with typedef names and enumeration
+ constants.
+
+4. Get LALR(1) to work. Hard, but not impossible.
+
+5. More tests in the test directory.
+
+6. Performance improvements and cleanup in yacc.py.
+
+7. More documentation.
+
+8. Lots and lots of cleanup.
+
diff --git a/ext/ply/doc/ply.html b/ext/ply/doc/ply.html
new file mode 100644
index 000000000..2596066fe
--- /dev/null
+++ b/ext/ply/doc/ply.html
@@ -0,0 +1,1642 @@
+<html>
+<head>
+<title>PLY (Python Lex-Yacc)</title>
+</head>
+<body bgcolor="#ffffff">
+
+<h1>PLY (Python Lex-Yacc)</h1>
+
+<b>
+David M. Beazley <br>
+Department of Computer Science <br>
+University of Chicago <br>
+Chicago, IL 60637 <br>
+beazley@cs.uchicago.edu <br>
+</b>
+
+<p>
+Documentation version: $Header: /home/stever/bk/newmem2/ext/ply/doc/ply.html 1.1 03/06/06 14:53:34-00:00 stever@ $
+
+<h2>Introduction</h2>
+
+PLY is a Python-only implementation of the popular compiler
+construction tools lex and yacc. The implementation borrows ideas
+from a number of previous efforts; most notably John Aycock's SPARK
+toolkit. However, the overall flavor of the implementation is more
+closely modeled after the C version of lex and yacc. The other
+significant feature of PLY is that it provides extensive input
+validation and error reporting--much more so than other Python parsing
+tools.
+
+<p>
+Early versions of PLY were developed to support the Introduction to
+Compilers Course at the University of Chicago. In this course,
+students built a fully functional compiler for a simple Pascal-like
+language. Their compiler, implemented entirely in Python, had to
+include lexical analysis, parsing, type checking, type inference,
+nested scoping, and code generation for the SPARC processor.
+Approximately 30 different compiler implementations were completed in
+this course. Most of PLY's interface and operation has been motivated by common
+usability problems encountered by students.
+
+<p>
+Because PLY was primarily developed as an instructional tool, you will
+find it to be <em>MUCH</em> more picky about token and grammar rule
+specification than most other Python parsing tools. In part, this
+added formality is meant to catch common programming mistakes made by
+novice users. However, advanced users will also find such features to
+be useful when building complicated grammars for real programming
+languages. It should also be noted that PLY does not provide much in the way
+of bells and whistles (e.g., automatic construction of abstract syntax trees,
+tree traversal, etc.). Instead, you will find a bare-bones, yet
+fully capable lex/yacc implementation written entirely in Python.
+
+<p>
+The rest of this document assumes that you are somewhat familar with
+parsing theory, syntax directed translation, and automatic tools such
+as lex and yacc. If you are unfamilar with these topics, you will
+probably want to consult an introductory text such as "Compilers:
+Principles, Techniques, and Tools", by Aho, Sethi, and Ullman. "Lex
+and Yacc" by John Levine may also be handy.
+
+<h2>PLY Overview</h2>
+
+PLY consists of two separate tools; <tt>lex.py</tt> and
+<tt>yacc.py</tt>. <tt>lex.py</tt> is used to break input text into a
+collection of tokens specified by a collection of regular expression
+rules. <tt>yacc.py</tt> is used to recognize language syntax that has
+been specified in the form of a context free grammar. Currently,
+<tt>yacc.py</tt> uses LR parsing and generates its parsing tables
+using the SLR algorithm. LALR(1) parsing may be supported in a future
+release.
+
+<p>
+The two tools are meant to work together. Specifically,
+<tt>lex.py</tt> provides an external interface in the form of a
+<tt>token()</tt> function that returns the next valid token on the
+input stream. <tt>yacc.py</tt> calls this repeatedly to retrieve
+tokens and invoke grammar rules. The output of <tt>yacc.py</tt> is
+often an Abstract Syntax Tree (AST). However, this is entirely up to
+the user. If desired, <tt>yacc.py</tt> can also be used to implement
+simple one-pass compilers.
+
+<p>
+Like its Unix counterpart, <tt>yacc.py</tt> provides most of the
+features you expect including extensive error checking, grammar
+validation, support for empty productions, error tokens, and ambiguity
+resolution via precedence rules. The primary difference between
+<tt>yacc.py</tt> and <tt>yacc</tt> is the use of SLR parsing instead
+of LALR(1). Although this slightly restricts the types of grammars
+than can be successfully parsed, it is sufficiently powerful to handle most
+kinds of normal programming language constructs.
+
+<p>
+Finally, it is important to note that PLY relies on reflection
+(introspection) to build its lexers and parsers. Unlike traditional
+lex/yacc which require a special input file that is converted into a
+separate source file, the specifications given to PLY <em>are</em>
+valid Python programs. This means that there are no extra source
+files nor is there a special compiler construction step (e.g., running
+yacc to generate Python code for the compiler).
+
+<h2>Lex Example</h2>
+
+<tt>lex.py</tt> is used to write tokenizers. To do this, each token
+must be defined by a regular expression rule. The following file
+implements a very simple lexer for tokenizing simple integer expressions:
+
+<blockquote>
+<pre>
+# ------------------------------------------------------------
+# calclex.py
+#
+# tokenizer for a simple expression evaluator for
+# numbers and +,-,*,/
+# ------------------------------------------------------------
+import lex
+
+# List of token names. This is always required
+tokens = (
+ 'NUMBER',
+ 'PLUS',
+ 'MINUS',
+ 'TIMES',
+ 'DIVIDE',
+ 'LPAREN',
+ 'RPAREN',
+)
+
+# Regular expression rules for simple tokens
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_TIMES = r'\*'
+t_DIVIDE = r'/'
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+
+# A regular expression rule with some action code
+def t_NUMBER(t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Line %d: Number %s is too large!" % (t.lineno,t.value)
+ t.value = 0
+ return t
+
+# Define a rule so we can track line numbers
+def t_newline(t):
+ r'\n+'
+ t.lineno += len(t.value)
+
+# A string containing ignored characters (spaces and tabs)
+t_ignore = ' \t'
+
+# Error handling rule
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.skip(1)
+
+# Build the lexer
+lex.lex()
+
+# Test it out
+data = '''
+3 + 4 * 10
+ + -20 *2
+'''
+
+# Give the lexer some input
+lex.input(data)
+
+# Tokenize
+while 1:
+ tok = lex.token()
+ if not tok: break # No more input
+ print tok
+</pre>
+</blockquote>
+
+In the example, the <tt>tokens</tt> list defines all of the possible
+token names that can be produced by the lexer. This list is always required
+and is used to perform a variety of validation checks. Following the <tt>tokens</tt>
+list, regular expressions are written for each token. Each of these
+rules are defined by making declarations with a special prefix <tt>t_</tt> to indicate that it
+defines a token. For simple tokens, the regular expression can
+be specified as strings such as this (note: Python raw strings are used since they are the
+most convenient way to write regular expression strings):
+
+<blockquote>
+<pre>
+t_PLUS = r'\+'
+</pre>
+</blockquote>
+
+In this case, the name following the <tt>t_</tt> must exactly match one of the
+names supplied in <tt>tokens</tt>. If some kind of action needs to be performed,
+a token rule can be specified as a function. For example:
+
+<blockquote>
+<pre>
+def t_NUMBER(t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Number %s is too large!" % t.value
+ t.value = 0
+ return t
+</pre>
+</blockquote>
+
+In this case, the regular expression rule is specified in the function documentation string.
+The function always takes a single argument which is an instance of
+<tt>LexToken</tt>. This object has attributes of <tt>t.type</tt> which is the token type,
+<tt>t.value</tt> which is the lexeme, and <tt>t.lineno</tt> which is the current line number.
+By default, <tt>t.type</tt> is set to the name following the <tt>t_</tt> prefix. The action
+function can modify the contents of the <tt>LexToken</tt> object as appropriate. However,
+when it is done, the resulting token should be returned. If no value is returned by the action
+function, the token is simply discarded and the next token read.
+
+<p>
+The rule <tt>t_newline()</tt> illustrates a regular expression rule
+for a discarded token. In this case, a rule is written to match
+newlines so that proper line number tracking can be performed.
+By returning no value, the function causes the newline character to be
+discarded.
+
+<p>
+The special <tt>t_ignore</tt> rule is reserved by <tt>lex.py</tt> for characters
+that should be completely ignored in the input stream.
+Usually this is used to skip over whitespace and other non-essential characters.
+Although it is possible to define a regular expression rule for whitespace in a manner
+similar to <tt>t_newline()</tt>, the use of <tt>t_ignore</tt> provides substantially better
+lexing performance because it is handled as a special case and is checked in a much
+more efficient manner than the normal regular expression rules.
+
+<p>
+Finally, the <tt>t_error()</tt>
+function is used to handle lexing errors that occur when illegal
+characters are detected. In this case, the <tt>t.value</tt> attribute contains the
+rest of the input string that has not been tokenized. In the example, we simply print
+the offending character and skip ahead one character by calling <tt>t.skip(1)</tt>.
+
+<p>
+To build the lexer, the function <tt>lex.lex()</tt> is used. This function
+uses Python reflection (or introspection) to read the the regular expression rules
+out of the calling context and build the lexer. Once the lexer has been built, two functions can
+be used to control the lexer.
+
+<ul>
+<li><tt>lex.input(data)</tt>. Reset the lexer and store a new input string.
+<li><tt>lex.token()</tt>. Return the next token. Returns a special <tt>LexToken</tt> instance on success or
+None if the end of the input text has been reached.
+</ul>
+
+The code at the bottom of the example shows how the lexer is actually used. When executed,
+the following output will be produced:
+
+<blockquote>
+<pre>
+$ python example.py
+LexToken(NUMBER,3,2)
+LexToken(PLUS,'+',2)
+LexToken(NUMBER,4,2)
+LexToken(TIMES,'*',2)
+LexToken(NUMBER,10,2)
+LexToken(PLUS,'+',3)
+LexToken(MINUS,'-',3)
+LexToken(NUMBER,20,3)
+LexToken(TIMES,'*',3)
+LexToken(NUMBER,2,3)
+</pre>
+</blockquote>
+
+<h2>Lex Implementation Notes</h2>
+
+<ul>
+<li><tt>lex.py</tt> uses the <tt>re</tt> module to do its patten matching. When building the master regular expression,
+rules are added in the following order:
+<p>
+<ol>
+<li>All tokens defined by functions are added in the same order as they appear in the lexer file.
+<li>Tokens defined by strings are added by sorting them in order of decreasing regular expression length (longer expressions
+are added first).
+</ol>
+<p>
+Without this ordering, it can be difficult to correctly match certain types of tokens. For example, if you
+wanted to have separate tokens for "=" and "==", you need to make sure that "==" is checked first. By sorting regular
+expressions in order of decreasing length, this problem is solved for rules defined as strings. For functions,
+the order can be explicitly controlled since rules appearing first are checked first.
+
+<P>
+<li>The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this
+rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data
+such as open files or sockets. This limitation is primarily a side-effect of using the <tt>re</tt> module.
+
+<p>
+<li>
+To handle reserved words, it is usually easier to just match an identifier and do a special name lookup in a function
+like this:
+
+<blockquote>
+<pre>
+reserved = {
+ 'if' : 'IF',
+ 'then' : 'THEN',
+ 'else' : 'ELSE',
+ 'while' : 'WHILE',
+ ...
+}
+
+def t_ID(t):
+ r'[a-zA-Z_][a-zA-Z_0-9]*'
+ t.type = reserved.get(t.value,'ID') # Check for reserved words
+ return t
+</pre>
+</blockquote>
+
+<p>
+<li>The lexer requires tokens to be defined as class instances with <tt>t.type</tt>, <tt>t.value</tt>, and <tt>t.lineno</tt>
+attributes. By default, tokens are created as instances of the <tt>LexToken</tt> class defined internally to <tt>lex.py</tt>.
+If desired, you can create new kinds of tokens provided that they have the three required attributes. However,
+in practice, it is probably safer to stick with the default.
+
+<p>
+<li>The only safe attribute for assigning token properties is <tt>t.value</tt>. In some cases, you may want to attach
+a number of different properties to a token (e.g., symbol table entries for identifiers). To do this, replace <tt>t.value</tt>
+with a tuple or class instance. For example:
+
+<blockquote>
+<pre>
+def t_ID(t):
+ ...
+ # For identifiers, create a (lexeme, symtab) tuple
+ t.value = (t.value, symbol_lookup(t.value))
+ ...
+ return t
+</pre>
+</blockquote>
+
+Although allowed, do NOT assign additional attributes to the token object. For example,
+<blockquote>
+<pre>
+def t_ID(t):
+ ...
+ # Bad implementation of above
+ t.symtab = symbol_lookup(t.value)
+ ...
+</pre>
+</blockquote>
+
+The reason you don't want to do this is that the <tt>yacc.py</tt>
+module only provides public access to the <tt>t.value</tt> attribute of each token.
+Therefore, any other attributes you assign are inaccessible (if you are familiar
+with the internals of C lex/yacc, <tt>t.value</tt> is the same as <tt>yylval.tok</tt>).
+
+<p>
+<li>To track line numbers, the lexer internally maintains a line
+number variable. Each token automatically gets the value of the
+current line number in the <tt>t.lineno</tt> attribute. To modify the
+current line number, simply change the <tt>t.lineno</tt> attribute
+in a function rule (as previously shown for
+<tt>t_newline()</tt>). Even if the resulting token is discarded,
+changes to the line number remain in effect for subsequent tokens.
+
+<p>
+<li>To support multiple scanners in the same application, the <tt>lex.lex()</tt> function
+actually returns a special <tt>Lexer</tt> object. This object has two methods
+<tt>input()</tt> and <tt>token()</tt> that can be used to supply input and get tokens. For example:
+
+<blockquote>
+<pre>
+lexer = lex.lex()
+lexer.input(sometext)
+while 1:
+ tok = lexer.token()
+ if not tok: break
+ print tok
+</pre>
+</blockquote>
+
+The functions <tt>lex.input()</tt> and <tt>lex.token()</tt> are bound to the <tt>input()</tt>
+and <tt>token()</tt> methods of the last lexer created by the lex module.
+
+
+<p>
+<li>To reduce compiler startup time and improve performance, the lexer can be built in optimized mode as follows:
+
+<blockquote>
+<pre>
+lex.lex(optimize=1)
+</pre>
+</blockquote>
+
+When used, most error checking and validation is disabled. This provides a slight performance
+gain while tokenizing and tends to chop a few tenths of a second off startup time. Since it disables
+error checking, this mode is not the default and is not recommended during development. However, once
+you have your compiler fully working, it is usually safe to disable the error checks.
+
+<p>
+<li>You can enable some additional debugging by building the lexer like this:
+
+<blockquote>
+<pre>
+lex.lex(debug=1)
+</pre>
+</blockquote>
+
+<p>
+<li>To help you debug your lexer, <tt>lex.py</tt> comes with a simple main program which will either
+tokenize input read from standard input or from a file. To use it, simply put this in your lexer:
+
+<blockquote>
+<pre>
+if __name__ == '__main__':
+ lex.runmain()
+</pre>
+</blockquote>
+
+Then, run you lexer as a main program such as <tt>python mylex.py</tt>
+
+<p>
+<li>Since the lexer is written entirely in Python, its performance is
+largely determined by that of the Python <tt>re</tt> module. Although
+the lexer has been written to be as efficient as possible, it's not
+blazingly fast when used on very large input files. Sorry. If
+performance is concern, you might consider upgrading to the most
+recent version of Python, creating a hand-written lexer, or offloading
+the lexer into a C extension module. In defense of <tt>lex.py</tt>,
+it's performance is not <em>that</em> bad when used on reasonably
+sized input files. For instance, lexing a 4700 line C program with
+32000 input tokens takes about 20 seconds on a 200 Mhz PC. Obviously,
+it will run much faster on a more speedy machine.
+
+</ul>
+
+<h2>Parsing basics</h2>
+
+<tt>yacc.py</tt> is used to parse language syntax. Before showing an
+example, there are a few important bits of background that must be
+mentioned. First, <tt>syntax</tt> is usually specified in terms of a
+context free grammar (CFG). For example, if you wanted to parse
+simple arithmetic expressions, you might first write an unambiguous
+grammar specification like this:
+
+<blockquote>
+<pre>
+expression : expression + term
+ | expression - term
+ | term
+
+term : term * factor
+ | term / factor
+ | factor
+
+factor : NUMBER
+ | ( expression )
+</pre>
+</blockquote>
+
+Next, the semantic behavior of a language is often specified using a
+technique known as syntax directed translation. In syntax directed
+translation, attributes are attached to each symbol in a given grammar
+rule along with an action. Whenever a particular grammar rule is
+recognized, the action describes what to do. For example, given the
+expression grammar above, you might write the specification for a
+simple calculator like this:
+
+<blockquote>
+<pre>
+Grammar Action
+-------------------------------- --------------------------------------------
+expression0 : expression1 + term expression0.val = expression1.val + term.val
+ | expression1 - term expression0.val = expression1.val - term.val
+ | term expression0.val = term.val
+
+term0 : term1 * factor term0.val = term1.val * factor.val
+ | term1 / factor term0.val = term1.val / factor.val
+ | factor term0.val = factor.val
+
+factor : NUMBER factor.val = int(NUMBER.lexval)
+ | ( expression ) factor.val = expression.val
+</pre>
+</blockquote>
+
+Finally, Yacc uses a parsing technique known as LR-parsing or shift-reduce parsing. LR parsing is a
+bottom up technique that tries to recognize the right-hand-side of various grammar rules.
+Whenever a valid right-hand-side is found in the input, the appropriate action code is triggered and the
+grammar symbols are replaced by the grammar symbol on the left-hand-side.
+
+<p>
+LR parsing is commonly implemented by shifting grammar symbols onto a stack and looking at the stack and the next
+input token for patterns. The details of the algorithm can be found in a compiler text, but the
+following example illustrates the steps that are performed if you wanted to parse the expression
+<tt>3 + 5 * (10 - 20)</tt> using the grammar defined above:
+
+<blockquote>
+<pre>
+Step Symbol Stack Input Tokens Action
+---- --------------------- --------------------- -------------------------------
+1 $ 3 + 5 * ( 10 - 20 )$ Shift 3
+2 $ 3 + 5 * ( 10 - 20 )$ Reduce factor : NUMBER
+3 $ factor + 5 * ( 10 - 20 )$ Reduce term : factor
+4 $ term + 5 * ( 10 - 20 )$ Reduce expr : term
+5 $ expr + 5 * ( 10 - 20 )$ Shift +
+6 $ expr + 5 * ( 10 - 20 )$ Shift 5
+7 $ expr + 5 * ( 10 - 20 )$ Reduce factor : NUMBER
+8 $ expr + factor * ( 10 - 20 )$ Reduce term : factor
+9 $ expr + term * ( 10 - 20 )$ Shift *
+10 $ expr + term * ( 10 - 20 )$ Shift (
+11 $ expr + term * ( 10 - 20 )$ Shift 10
+12 $ expr + term * ( 10 - 20 )$ Reduce factor : NUMBER
+13 $ expr + term * ( factor - 20 )$ Reduce term : factor
+14 $ expr + term * ( term - 20 )$ Reduce expr : term
+15 $ expr + term * ( expr - 20 )$ Shift -
+16 $ expr + term * ( expr - 20 )$ Shift 20
+17 $ expr + term * ( expr - 20 )$ Reduce factor : NUMBER
+18 $ expr + term * ( expr - factor )$ Reduce term : factor
+19 $ expr + term * ( expr - term )$ Reduce expr : expr - term
+20 $ expr + term * ( expr )$ Shift )
+21 $ expr + term * ( expr ) $ Reduce factor : (expr)
+22 $ expr + term * factor $ Reduce term : term * factor
+23 $ expr + term $ Reduce expr : expr + term
+24 $ expr $ Reduce expr
+25 $ $ Success!
+</pre>
+</blockquote>
+
+When parsing the expression, an underlying state machine and the current input token determine what to do next.
+If the next token looks like part of a valid grammar rule (based on other items on the stack), it is generally shifted
+onto the stack. If the top of the stack contains a valid right-hand-side of a grammar rule, it is
+usually "reduced" and the symbols replaced with the symbol on the left-hand-side. When this reduction occurs, the
+appropriate action is triggered (if defined). If the input token can't be shifted and the top of stack doesn't match
+any grammar rules, a syntax error has occurred and the parser must take some kind of recovery step (or bail out).
+
+<p>
+It is important to note that the underlying implementation is actually built around a large finite-state machine
+and some tables. The construction of these tables is quite complicated and beyond the scope of this discussion.
+However, subtle details of this process explain why, in the example above, the parser chooses to shift a token
+onto the stack in step 9 rather than reducing the rule <tt>expr : expr + term</tt>.
+
+<h2>Yacc example</h2>
+
+Suppose you wanted to make a grammar for simple arithmetic expressions as previously described. Here is
+how you would do it with <tt>yacc.py</tt>:
+
+<blockquote>
+<pre>
+# Yacc example
+
+import yacc
+
+# Get the token map from the lexer. This is required.
+from calclex import tokens
+
+def p_expression_plus(t):
+ 'expression : expression PLUS term'
+ t[0] = t[1] + t[3]
+
+def p_expression_minus(t):
+ 'expression : expression MINUS term'
+ t[0] = t[1] - t[3]
+
+def p_expression_term(t):
+ 'expression : term'
+ t[0] = t[1]
+
+def p_term_times(t):
+ 'term : term TIMES factor'
+ t[0] = t[1] * t[3]
+
+def p_term_div(t):
+ 'term : term DIVIDE factor'
+ t[0] = t[1] / t[3]
+
+def p_term_factor(t):
+ 'term : factor'
+ t[0] = t[1]
+
+def p_factor_num(t):
+ 'factor : NUMBER'
+ t[0] = t[1]
+
+def p_factor_expr(t):
+ 'factor : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+# Error rule for syntax errors
+def p_error(t):
+ print "Syntax error in input!"
+
+# Build the parser
+yacc.yacc()
+
+while 1:
+ try:
+ s = raw_input('calc > ')
+ except EOFError:
+ break
+ if not s: continue
+ result = yacc.parse(s)
+ print result
+</pre>
+</blockquote>
+
+In this example, each grammar rule is defined by a Python function where the docstring to that function contains the
+appropriate context-free grammar specification (an idea borrowed from John Aycock's SPARK toolkit). Each function accepts a single
+argument <tt>t</tt> that is a sequence containing the values of each grammar symbol in the corresponding rule. The values of
+<tt>t[i]</tt> are mapped to grammar symbols as shown here:
+
+<blockquote>
+<pre>
+def p_expression_plus(t):
+ 'expression : expression PLUS term'
+ # ^ ^ ^ ^
+ # t[0] t[1] t[2] t[3]
+
+ t[0] = t[1] + t[3]
+</pre>
+</blockquote>
+
+For tokens, the "value" in the corresponding <tt>t[i]</tt> is the
+<em>same</em> as the value of the <tt>t.value</tt> attribute assigned
+in the lexer module. For non-terminals, the value is determined by
+whatever is placed in <tt>t[0]</tt> when rules are reduced. This
+value can be anything at all. However, it probably most common for
+the value to be a simple Python type, a tuple, or an instance. In this example, we
+are relying on the fact that the <tt>NUMBER</tt> token stores an integer value in its value
+field. All of the other rules simply perform various types of integer operations and store
+the result.
+
+<p>
+The first rule defined in the yacc specification determines the starting grammar
+symbol (in this case, a rule for <tt>expression</tt> appears first). Whenever
+the starting rule is reduced by the parser and no more input is available, parsing
+stops and the final value is returned (this value will be whatever the top-most rule
+placed in <tt>t[0]</tt>).
+
+<p>The <tt>p_error(t)</tt> rule is defined to catch syntax errors. See the error handling section
+below for more detail.
+
+<p>
+To build the parser, call the <tt>yacc.yacc()</tt> function. This function
+looks at the module and attempts to construct all of the LR parsing tables for the grammar
+you have specified. The first time <tt>yacc.yacc()</tt> is invoked, you will get a message
+such as this:
+
+<blockquote>
+<pre>
+$ python calcparse.py
+yacc: Generating SLR parsing table...
+calc >
+</pre>
+</blockquote>
+
+Since table construction is relatively expensive (especially for large
+grammars), the resulting parsing table is written to the current
+directory in a file called <tt>parsetab.py</tt>. In addition, a
+debugging file called <tt>parser.out</tt> is created. On subsequent
+executions, <tt>yacc</tt> will reload the table from
+<tt>parsetab.py</tt> unless it has detected a change in the underlying
+grammar (in which case the tables and <tt>parsetab.py</tt> file are
+regenerated).
+
+<p>
+If any errors are detected in your grammar specification, <tt>yacc.py</tt> will produce
+diagnostic messages and possibly raise an exception. Some of the errors that can be detected include:
+
+<ul>
+<li>Duplicated function names (if more than one rule function have the same name in the grammar file).
+<li>Shift/reduce and reduce/reduce conflicts generated by ambiguous grammars.
+<li>Badly specified grammar rules.
+<li>Infinite recursion (rules that can never terminate).
+<li>Unused rules and tokens
+<li>Undefined rules and tokens
+</ul>
+
+The next few sections now discuss a few finer points of grammar construction.
+
+<h2>Combining Grammar Rule Functions</h2>
+
+When grammar rules are similar, they can be combined into a single function.
+For example, consider the two rules in our earlier example:
+
+<blockquote>
+<pre>
+def p_expression_plus(t):
+ 'expression : expression PLUS term'
+ t[0] = t[1] + t[3]
+
+def p_expression_minus(t):
+ 'expression : expression MINUS term'
+ t[0] = t[1] - t[3]
+</pre>
+</blockquote>
+
+Instead of writing two functions, you might write a single function like this:
+
+<blockquote>
+<pre>
+def p_expression(t):
+ '''expression : expression PLUS term
+ | expression MINUS term'''
+ if t[2] == '+':
+ t[0] = t[1] + t[3]
+ elif t[2] == '-':
+ t[0] = t[1] - t[3]
+</pre>
+</blockquote>
+
+In general, the doc string for any given function can contain multiple grammar rules. So, it would
+have also been legal (although possibly confusing) to write this:
+
+<blockquote>
+<pre>
+def p_binary_operators(t):
+ '''expression : expression PLUS term
+ | expression MINUS term
+ term : term TIMES factor
+ | term DIVIDE factor'''
+ if t[2] == '+':
+ t[0] = t[1] + t[3]
+ elif t[2] == '-':
+ t[0] = t[1] - t[3]
+ elif t[2] == '*':
+ t[0] = t[1] * t[3]
+ elif t[2] == '/':
+ t[0] = t[1] / t[3]
+</pre>
+</blockquote>
+
+When combining grammar rules into a single function, it is usually a good idea for all of the rules to have
+a similar structure (e.g., the same number of terms). Otherwise, the corresponding action code may be more
+complicated than necessary.
+
+<h2>Empty Productions</h2>
+
+<tt>yacc.py</tt> can handle empty productions by defining a rule like this:
+
+<blockquote>
+<pre>
+def p_empty(t):
+ 'empty :'
+ pass
+</pre>
+</blockquote>
+
+Now to use the empty production, simply use 'empty' as a symbol. For example:
+
+<blockquote>
+<pre>
+def p_optitem(t):
+ 'optitem : item'
+ ' | empty'
+ ...
+</pre>
+</blockquote>
+
+<h2>Dealing With Ambiguous Grammars</h2>
+
+The expression grammar given in the earlier example has been written in a special format to eliminate ambiguity.
+However, in many situations, it is extremely difficult or awkward to write grammars in this format. A
+much more natural way to express the grammar is in a more compact form like this:
+
+<blockquote>
+<pre>
+expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression
+ | LPAREN expression RPAREN
+ | NUMBER
+</pre>
+</blockquote>
+
+Unfortunately, this grammar specification is ambiguous. For example, if you are parsing the string
+"3 * 4 + 5", there is no way to tell how the operators are supposed to be grouped.
+For example, does this expression mean "(3 * 4) + 5" or is it "3 * (4+5)"?
+
+<p>
+When an ambiguous grammar is given to <tt>yacc.py</tt> it will print messages about "shift/reduce conflicts"
+or a "reduce/reduce conflicts". A shift/reduce conflict is caused when the parser generator can't decide
+whether or not to reduce a rule or shift a symbol on the parsing stack. For example, consider
+the string "3 * 4 + 5" and the internal parsing stack:
+
+<blockquote>
+<pre>
+Step Symbol Stack Input Tokens Action
+---- --------------------- --------------------- -------------------------------
+1 $ 3 * 4 + 5$ Shift 3
+2 $ 3 * 4 + 5$ Reduce : expression : NUMBER
+3 $ expr * 4 + 5$ Shift *
+4 $ expr * 4 + 5$ Shift 4
+5 $ expr * 4 + 5$ Reduce: expression : NUMBER
+6 $ expr * expr + 5$ SHIFT/REDUCE CONFLICT ????
+</pre>
+</blockquote>
+
+In this case, when the parser reaches step 6, it has two options. One is the reduce the
+rule <tt>expr : expr * expr</tt> on the stack. The other option is to shift the
+token <tt>+</tt> on the stack. Both options are perfectly legal from the rules
+of the context-free-grammar.
+
+<p>
+By default, all shift/reduce conflicts are resolved in favor of shifting. Therefore, in the above
+example, the parser will always shift the <tt>+</tt> instead of reducing. Although this
+strategy works in many cases (including the ambiguous if-then-else), it is not enough for arithmetic
+expressions. In fact, in the above example, the decision to shift <tt>+</tt> is completely wrong---we should have
+reduced <tt>expr * expr</tt> since multiplication has higher precedence than addition.
+
+<p>To resolve ambiguity, especially in expression grammars, <tt>yacc.py</tt> allows individual
+tokens to be assigned a precedence level and associativity. This is done by adding a variable
+<tt>precedence</tt> to the grammar file like this:
+
+<blockquote>
+<pre>
+precedence = (
+ ('left', 'PLUS', 'MINUS'),
+ ('left', 'TIMES', 'DIVIDE'),
+)
+</pre>
+</blockquote>
+
+This declaration specifies that <tt>PLUS</tt>/<tt>MINUS</tt> have
+the same precedence level and are left-associative and that
+<tt>TIMES</tt>/<tt>DIVIDE</tt> have the same precedence and are left-associative.
+Furthermore, the declaration specifies that <tt>TIMES</tt>/<tt>DIVIDE</tt> have higher
+precedence than <tt>PLUS</tt>/<tt>MINUS</tt> (since they appear later in the
+precedence specification).
+
+<p>
+The precedence specification is used to attach a numerical precedence value and associativity direction
+to each grammar rule. This is always determined by the precedence of the right-most terminal symbol. Therefore,
+if PLUS/MINUS had a precedence of 1 and TIMES/DIVIDE had a precedence of 2, the grammar rules
+would have precedence values as follows:
+
+<blockquote>
+<pre>
+expression : expression PLUS expression # prec = 1, left
+ | expression MINUS expression # prec = 1, left
+ | expression TIMES expression # prec = 2, left
+ | expression DIVIDE expression # prec = 2, left
+ | LPAREN expression RPAREN # prec = unknown
+ | NUMBER # prec = unknown
+</pre>
+</blockquote>
+
+When shift/reduce conflicts are encountered, the parser generator resolves the conflict by
+looking at the precedence rules and associativity specifiers.
+
+<p>
+<ol>
+<li>If the current token has higher precedence, it is shifted.
+<li>If the grammar rule on the stack has higher precedence, the rule is reduced.
+<li>If the current token and the grammar rule have the same precedence, the
+rule is reduced for left associativity, whereas the token is shifted for right associativity.
+<li>If nothing is known about the precedence, shift/reduce conflicts are resolved in
+favor of shifting (the default).
+</ol>
+
+<p>
+When shift/reduce conflicts are resolved using the first three techniques (with the help of
+precedence rules), <tt>yacc.py</tt> will report no errors or conflicts in the grammar.
+
+<p>
+One problem with the precedence specifier technique is that it is sometimes necessary to
+change the precedence of an operator in certain contents. For example, consider a unary-minus operator
+in "3 + 4 * -5". Normally, unary minus has a very high precedence--being evaluated before the multiply.
+However, in our precedence specifier, MINUS has a lower precedence than TIMES. To deal with this,
+precedence rules can be given for fictitious tokens like this:
+
+<blockquote>
+<pre>
+precedence = (
+ ('left', 'PLUS', 'MINUS'),
+ ('left', 'TIMES', 'DIVIDE'),
+ ('right', 'UMINUS'), # Unary minus operator
+)
+</pre>
+</blockquote>
+
+Now, in the grammar file, we can write our unary minus rule like this:
+
+<blockquote>
+<pre>
+def p_expr_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+</pre>
+</blockquote>
+
+In this case, <tt>%prec UMINUS</tt> overrides the default rule precedence--setting it to that
+of UMINUS in the precedence specifier.
+
+<p>
+It is also possible to specify non-associativity in the <tt>precedence</tt> table. This would
+be used when you <em>don't</em> want operations to chain together. For example, suppose
+you wanted to support a comparison operators like <tt>&lt;</tt> and <tt>&gt;</tt> but you didn't want to allow
+combinations like <tt>a &lt; b &lt; c</tt>. To do this, simply specify a rule like this:
+
+<blockquote>
+<pre>
+precedence = (
+ ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators
+ ('left', 'PLUS', 'MINUS'),
+ ('left', 'TIMES', 'DIVIDE'),
+ ('right', 'UMINUS'), # Unary minus operator
+)
+</pre>
+</blockquote>
+
+<p>
+Reduce/reduce conflicts are caused when there are multiple grammar
+rules that can be applied to a given set of symbols. This kind of
+conflict is almost always bad and is always resolved by picking the
+rule that appears first in the grammar file. Reduce/reduce conflicts
+are almost always caused when different sets of grammar rules somehow
+generate the same set of symbols. For example:
+
+<blockquote>
+<pre>
+assignment : ID EQUALS NUMBER
+ | ID EQUALS expression
+
+expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression
+ | LPAREN expression RPAREN
+ | NUMBER
+</pre>
+</blockquote>
+
+In this case, a reduce/reduce conflict exists between these two rules:
+
+<blockquote>
+<pre>
+assignment : ID EQUALS NUMBER
+expression : NUMBER
+</pre>
+</blockquote>
+
+For example, if you wrote "a = 5", the parser can't figure out if this
+is supposed to reduced as <tt>assignment : ID EQUALS NUMBER</tt> or
+whether it's supposed to reduce the 5 as an expression and then reduce
+the rule <tt>assignment : ID EQUALS expression</tt>.
+
+<h2>The parser.out file</h2>
+
+Tracking down shift/reduce and reduce/reduce conflicts is one of the finer pleasures of using an LR
+parsing algorithm. To assist in debugging, <tt>yacc.py</tt> creates a debugging file called
+'parser.out' when it generates the parsing table. The contents of this file look like the following:
+
+<blockquote>
+<pre>
+Unused terminals:
+
+
+Grammar
+
+Rule 1 expression -> expression PLUS expression
+Rule 2 expression -> expression MINUS expression
+Rule 3 expression -> expression TIMES expression
+Rule 4 expression -> expression DIVIDE expression
+Rule 5 expression -> NUMBER
+Rule 6 expression -> LPAREN expression RPAREN
+
+Terminals, with rules where they appear
+
+TIMES : 3
+error :
+MINUS : 2
+RPAREN : 6
+LPAREN : 6
+DIVIDE : 4
+PLUS : 1
+NUMBER : 5
+
+Nonterminals, with rules where they appear
+
+expression : 1 1 2 2 3 3 4 4 6 0
+
+
+Parsing method: SLR
+
+
+state 0
+
+ S' -> . expression
+ expression -> . expression PLUS expression
+ expression -> . expression MINUS expression
+ expression -> . expression TIMES expression
+ expression -> . expression DIVIDE expression
+ expression -> . NUMBER
+ expression -> . LPAREN expression RPAREN
+
+ NUMBER shift and go to state 3
+ LPAREN shift and go to state 2
+
+
+state 1
+
+ S' -> expression .
+ expression -> expression . PLUS expression
+ expression -> expression . MINUS expression
+ expression -> expression . TIMES expression
+ expression -> expression . DIVIDE expression
+
+ PLUS shift and go to state 6
+ MINUS shift and go to state 5
+ TIMES shift and go to state 4
+ DIVIDE shift and go to state 7
+
+
+state 2
+
+ expression -> LPAREN . expression RPAREN
+ expression -> . expression PLUS expression
+ expression -> . expression MINUS expression
+ expression -> . expression TIMES expression
+ expression -> . expression DIVIDE expression
+ expression -> . NUMBER
+ expression -> . LPAREN expression RPAREN
+
+ NUMBER shift and go to state 3
+ LPAREN shift and go to state 2
+
+
+state 3
+
+ expression -> NUMBER .
+
+ $ reduce using rule 5
+ PLUS reduce using rule 5
+ MINUS reduce using rule 5
+ TIMES reduce using rule 5
+ DIVIDE reduce using rule 5
+ RPAREN reduce using rule 5
+
+
+state 4
+
+ expression -> expression TIMES . expression
+ expression -> . expression PLUS expression
+ expression -> . expression MINUS expression
+ expression -> . expression TIMES expression
+ expression -> . expression DIVIDE expression
+ expression -> . NUMBER
+ expression -> . LPAREN expression RPAREN
+
+ NUMBER shift and go to state 3
+ LPAREN shift and go to state 2
+
+
+state 5
+
+ expression -> expression MINUS . expression
+ expression -> . expression PLUS expression
+ expression -> . expression MINUS expression
+ expression -> . expression TIMES expression
+ expression -> . expression DIVIDE expression
+ expression -> . NUMBER
+ expression -> . LPAREN expression RPAREN
+
+ NUMBER shift and go to state 3
+ LPAREN shift and go to state 2
+
+
+state 6
+
+ expression -> expression PLUS . expression
+ expression -> . expression PLUS expression
+ expression -> . expression MINUS expression
+ expression -> . expression TIMES expression
+ expression -> . expression DIVIDE expression
+ expression -> . NUMBER
+ expression -> . LPAREN expression RPAREN
+
+ NUMBER shift and go to state 3
+ LPAREN shift and go to state 2
+
+
+state 7
+
+ expression -> expression DIVIDE . expression
+ expression -> . expression PLUS expression
+ expression -> . expression MINUS expression
+ expression -> . expression TIMES expression
+ expression -> . expression DIVIDE expression
+ expression -> . NUMBER
+ expression -> . LPAREN expression RPAREN
+
+ NUMBER shift and go to state 3
+ LPAREN shift and go to state 2
+
+
+state 8
+
+ expression -> LPAREN expression . RPAREN
+ expression -> expression . PLUS expression
+ expression -> expression . MINUS expression
+ expression -> expression . TIMES expression
+ expression -> expression . DIVIDE expression
+
+ RPAREN shift and go to state 13
+ PLUS shift and go to state 6
+ MINUS shift and go to state 5
+ TIMES shift and go to state 4
+ DIVIDE shift and go to state 7
+
+
+state 9
+
+ expression -> expression TIMES expression .
+ expression -> expression . PLUS expression
+ expression -> expression . MINUS expression
+ expression -> expression . TIMES expression
+ expression -> expression . DIVIDE expression
+
+ $ reduce using rule 3
+ PLUS reduce using rule 3
+ MINUS reduce using rule 3
+ TIMES reduce using rule 3
+ DIVIDE reduce using rule 3
+ RPAREN reduce using rule 3
+
+ ! PLUS [ shift and go to state 6 ]
+ ! MINUS [ shift and go to state 5 ]
+ ! TIMES [ shift and go to state 4 ]
+ ! DIVIDE [ shift and go to state 7 ]
+
+state 10
+
+ expression -> expression MINUS expression .
+ expression -> expression . PLUS expression
+ expression -> expression . MINUS expression
+ expression -> expression . TIMES expression
+ expression -> expression . DIVIDE expression
+
+ $ reduce using rule 2
+ PLUS reduce using rule 2
+ MINUS reduce using rule 2
+ RPAREN reduce using rule 2
+ TIMES shift and go to state 4
+ DIVIDE shift and go to state 7
+
+ ! TIMES [ reduce using rule 2 ]
+ ! DIVIDE [ reduce using rule 2 ]
+ ! PLUS [ shift and go to state 6 ]
+ ! MINUS [ shift and go to state 5 ]
+
+state 11
+
+ expression -> expression PLUS expression .
+ expression -> expression . PLUS expression
+ expression -> expression . MINUS expression
+ expression -> expression . TIMES expression
+ expression -> expression . DIVIDE expression
+
+ $ reduce using rule 1
+ PLUS reduce using rule 1
+ MINUS reduce using rule 1
+ RPAREN reduce using rule 1
+ TIMES shift and go to state 4
+ DIVIDE shift and go to state 7
+
+ ! TIMES [ reduce using rule 1 ]
+ ! DIVIDE [ reduce using rule 1 ]
+ ! PLUS [ shift and go to state 6 ]
+ ! MINUS [ shift and go to state 5 ]
+
+state 12
+
+ expression -> expression DIVIDE expression .
+ expression -> expression . PLUS expression
+ expression -> expression . MINUS expression
+ expression -> expression . TIMES expression
+ expression -> expression . DIVIDE expression
+
+ $ reduce using rule 4
+ PLUS reduce using rule 4
+ MINUS reduce using rule 4
+ TIMES reduce using rule 4
+ DIVIDE reduce using rule 4
+ RPAREN reduce using rule 4
+
+ ! PLUS [ shift and go to state 6 ]
+ ! MINUS [ shift and go to state 5 ]
+ ! TIMES [ shift and go to state 4 ]
+ ! DIVIDE [ shift and go to state 7 ]
+
+state 13
+
+ expression -> LPAREN expression RPAREN .
+
+ $ reduce using rule 6
+ PLUS reduce using rule 6
+ MINUS reduce using rule 6
+ TIMES reduce using rule 6
+ DIVIDE reduce using rule 6
+ RPAREN reduce using rule 6
+</pre>
+</blockquote>
+
+In the file, each state of the grammar is described. Within each state the "." indicates the current
+location of the parse within any applicable grammar rules. In addition, the actions for each valid
+input token are listed. When a shift/reduce or reduce/reduce conflict arises, rules <em>not</em> selected
+are prefixed with an !. For example:
+
+<blockquote>
+<pre>
+ ! TIMES [ reduce using rule 2 ]
+ ! DIVIDE [ reduce using rule 2 ]
+ ! PLUS [ shift and go to state 6 ]
+ ! MINUS [ shift and go to state 5 ]
+</pre>
+</blockquote>
+
+By looking at these rules (and with a little practice), you can usually track down the source
+of most parsing conflicts. It should also be stressed that not all shift-reduce conflicts are
+bad. However, the only way to be sure that they are resolved correctly is to look at <tt>parser.out</tt>.
+
+<h2>Syntax Error Handling</h2>
+
+When a syntax error occurs during parsing, the error is immediately
+detected (i.e., the parser does not read any more tokens beyond the
+source of the error). Error recovery in LR parsers is a delicate
+topic that involves ancient rituals and black-magic. The recovery mechanism
+provided by <tt>yacc.py</tt> is comparable to Unix yacc so you may want
+consult a book like O'Reilly's "Lex and Yacc" for some of the finer details.
+
+<p>
+When a syntax error occurs, <tt>yacc.py</tt> performs the following steps:
+
+<ol>
+<li>On the first occurrence of an error, the user-defined <tt>p_error()</tt> function
+is called with the offending token as an argument. Afterwards, the parser enters
+an "error-recovery" mode in which it will not make future calls to <tt>p_error()</tt> until it
+has successfully shifted at least 3 tokens onto the parsing stack.
+
+<p>
+<li>If no recovery action is taken in <tt>p_error()</tt>, the offending lookahead token is replaced
+with a special <tt>error</tt> token.
+
+<p>
+<li>If the offending lookahead token is already set to <tt>error</tt>, the top item of the parsing stack is
+deleted.
+
+<p>
+<li>If the entire parsing stack is unwound, the parser enters a restart state and attempts to start
+parsing from its initial state.
+
+<p>
+<li>If a grammar rule accepts <tt>error</tt> as a token, it will be
+shifted onto the parsing stack.
+
+<p>
+<li>If the top item of the parsing stack is <tt>error</tt>, lookahead tokens will be discarded until the
+parser can successfully shift a new symbol or reduce a rule involving <tt>error</tt>.
+</ol>
+
+<h4>Recovery and resynchronization with error rules</h4>
+
+The most well-behaved approach for handling syntax errors is to write grammar rules that include the <tt>error</tt>
+token. For example, suppose your language had a grammar rule for a print statement like this:
+
+<blockquote>
+<pre>
+def p_statement_print(t):
+ 'statement : PRINT expr SEMI'
+ ...
+</pre>
+</blockquote>
+
+To account for the possibility of a bad expression, you might write an additional grammar rule like this:
+
+<blockquote>
+<pre>
+def p_statement_print_error(t):
+ 'statement : PRINT error SEMI'
+ print "Syntax error in print statement. Bad expression"
+
+</pre>
+</blockquote>
+
+In this case, the <tt>error</tt> token will match any sequence of
+tokens that might appear up to the first semicolon that is
+encountered. Once the semicolon is reached, the rule will be
+invoked and the <tt>error</tt> token will go away.
+
+<p>
+This type of recovery is sometimes known as parser resynchronization.
+The <tt>error</tt> token acts as a wildcard for any bad input text and
+the token immediately following <tt>error</tt> acts as a
+synchronization token.
+
+<p>
+It is important to note that the <tt>error</tt> token usually does not appear as the last token
+on the right in an error rule. For example:
+
+<blockquote>
+<pre>
+def p_statement_print_error(t):
+ 'statement : PRINT error'
+ print "Syntax error in print statement. Bad expression"
+</pre>
+</blockquote>
+
+This is because the first bad token encountered will cause the rule to
+be reduced--which may make it difficult to recover if more bad tokens
+immediately follow.
+
+<h4>Panic mode recovery</h4>
+
+An alternative error recovery scheme is to enter a panic mode recovery in which tokens are
+discarded to a point where the parser might be able to recover in some sensible manner.
+
+<p>
+Panic mode recovery is implemented entirely in the <tt>p_error()</tt> function. For example, this
+function starts discarding tokens until it reaches a closing '}'. Then, it restarts the
+parser in its initial state.
+
+<blockquote>
+<pre>
+def p_error(t):
+ print "Whoa. You are seriously hosed."
+ # Read ahead looking for a closing '}'
+ while 1:
+ tok = yacc.token() # Get the next token
+ if not tok or tok.type == 'RBRACE': break
+ yacc.restart()
+</pre>
+</blockquote>
+
+<p>
+This function simply discards the bad token and tells the parser that the error was ok.
+
+<blockquote>
+<pre>
+def p_error(t):
+ print "Syntax error at token", t.type
+ # Just discard the token and tell the parser it's okay.
+ yacc.errok()
+</pre>
+</blockquote>
+
+<P>
+Within the <tt>p_error()</tt> function, three functions are available to control the behavior
+of the parser:
+<p>
+<ul>
+<li><tt>yacc.errok()</tt>. This resets the parser state so it doesn't think it's in error-recovery
+mode. This will prevent an <tt>error</tt> token from being generated and will reset the internal
+error counters so that the next syntax error will call <tt>p_error()</tt> again.
+
+<p>
+<li><tt>yacc.token()</tt>. This returns the next token on the input stream.
+
+<p>
+<li><tt>yacc.restart()</tt>. This discards the entire parsing stack and resets the parser
+to its initial state.
+</ul>
+
+Note: these functions are only available when invoking <tt>p_error()</tt> and are not available
+at any other time.
+
+<p>
+To supply the next lookahead token to the parser, <tt>p_error()</tt> can return a token. This might be
+useful if trying to synchronize on special characters. For example:
+
+<blockquote>
+<pre>
+def p_error(t):
+ # Read ahead looking for a terminating ";"
+ while 1:
+ tok = yacc.token() # Get the next token
+ if not tok or tok.type == 'SEMI': break
+ yacc.errok()
+
+ # Return SEMI to the parser as the next lookahead token
+ return tok
+</pre>
+</blockquote>
+
+<h4>General comments on error handling</h4>
+
+For normal types of languages, error recovery with error rules and resynchronization characters is probably the most reliable
+technique. This is because you can instrument the grammar to catch errors at selected places where it is relatively easy
+to recover and continue parsing. Panic mode recovery is really only useful in certain specialized applications where you might want
+to discard huge portions of the input text to find a valid restart point.
+
+<h2>Line Number Tracking</h2>
+
+<tt>yacc.py</tt> automatically tracks line numbers for all of the grammar symbols and tokens it processes. To retrieve the line
+numbers, two functions are used in grammar rules:
+
+<ul>
+<li><tt>t.lineno(num)</tt>. Return the starting line number for symbol <em>num</em>
+<li><tt>t.linespan(num)</tt>. Return a tuple (startline,endline) with the starting and ending line number for symbol <em>num</em>.
+</ul>
+
+For example:
+
+<blockquote>
+<pre>
+def t_expression(t):
+ 'expression : expression PLUS expression'
+ t.lineno(1) # Line number of the left expression
+ t.lineno(2) # line number of the PLUS operator
+ t.lineno(3) # line number of the right expression
+ ...
+ start,end = t.linespan(3) # Start,end lines of the right expression
+
+</pre>
+</blockquote>
+
+Since line numbers are managed internally by the parser, there is usually no need to modify the line
+numbers. However, if you want to save the line numbers in a parse-tree node, you will need to make your own
+private copy.
+
+<h2>AST Construction</h2>
+
+<tt>yacc.py</tt> provides no special functions for constructing an abstract syntax tree. However, such
+construction is easy enough to do on your own. Simply create a data structure for abstract syntax tree nodes
+and assign nodes to <tt>t[0]</tt> in each rule.
+
+For example:
+
+<blockquote>
+<pre>
+class Expr: pass
+
+class BinOp(Expr):
+ def __init__(self,left,op,right):
+ self.type = "binop"
+ self.left = left
+ self.right = right
+ self.op = op
+
+class Number(Expr):
+ def __init__(self,value):
+ self.type = "number"
+ self.value = value
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+
+ t[0] = BinOp(t[1],t[2],t[3])
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = Number(t[1])
+</pre>
+</blockquote>
+
+To simplify tree traversal, it may make sense to pick a very generic tree structure for your parse tree nodes.
+For example:
+
+<blockquote>
+<pre>
+class Node:
+ def __init__(self,type,children=None,leaf=None):
+ self.type = type
+ if children:
+ self.children = children
+ else:
+ self.children = [ ]
+ self.leaf = leaf
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+
+ t[0] = Node("binop", [t[1],t[3]], t[2])
+</pre>
+</blockquote>
+
+<h2>Yacc implementation notes</h2>
+
+<ul>
+<li>By default, <tt>yacc.py</tt> relies on <tt>lex.py</tt> for tokenizing. However, an alternative tokenizer
+can be supplied as follows:
+
+<blockquote>
+<pre>
+yacc.parse(lexer=x)
+</pre>
+</blockquote>
+in this case, <tt>x</tt> must be a Lexer object that minimally has a <tt>x.token()</tt> method for retrieving the next
+token. If an input string is given to <tt>yacc.parse()</tt>, the lexer must also have an <tt>x.input()</tt> method.
+
+<p>
+<li>By default, the yacc generates tables in debugging mode (which produces the parser.out file and other output).
+To disable this, use
+
+<blockquote>
+<pre>
+yacc.yacc(debug=0)
+</pre>
+</blockquote>
+
+<p>
+<li>To change the name of the <tt>parsetab.py</tt> file, use:
+
+<blockquote>
+<pre>
+yacc.yacc(tabmodule="foo")
+</pre>
+</blockquote>
+
+<P>
+<li>To print copious amounts of debugging during parsing, use:
+
+<blockquote>
+<pre>
+yacc.parse(debug=1)
+</pre>
+</blockquote>
+
+<p>
+<li>The <tt>yacc.yacc()</tt> function really returns a parser object. If you want to support multiple
+parsers in the same application, do this:
+
+<blockquote>
+<pre>
+p = yacc.yacc()
+...
+p.parse()
+</pre>
+</blockquote>
+
+Note: The function <tt>yacc.parse()</tt> is bound to the last parser that was generated.
+
+<p>
+<li>Since the generation of the SLR tables is relatively expensive, previously generated tables are
+cached and reused if possible. The decision to regenerate the tables is determined by taking an MD5
+checksum of all grammar rules and precedence rules. Only in the event of a mismatch are the tables regenerated.
+
+<p>
+It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules
+and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow
+machine. Please be patient.
+
+<p>
+<li>Since LR parsing is mostly driven by tables, the performance of the parser is largely independent of the
+size of the grammar. The biggest bottlenecks will be the lexer and the complexity of your grammar rules.
+</ul>
+
+<h2>Parser and Lexer State Management</h2>
+
+In advanced parsing applications, you may want to have multiple
+parsers and lexers. Furthermore, the parser may want to control the
+behavior of the lexer in some way.
+
+<p>
+To do this, it is important to note that both the lexer and parser are
+actually implemented as objects. These objects are returned by the
+<tt>lex()</tt> and <tt>yacc()</tt> functions respectively. For example:
+
+<blockquote>
+<pre>
+lexer = lex.lex() # Return lexer object
+parser = yacc.yacc() # Return parser object
+</pre>
+</blockquote>
+
+Within lexer and parser rules, these objects are also available. In the lexer,
+the "lexer" attribute of a token refers to the lexer object in use. For example:
+
+<blockquote>
+<pre>
+def t_NUMBER(t):
+ r'\d+'
+ ...
+ print t.lexer # Show lexer object
+</pre>
+</blockquote>
+
+In the parser, the "lexer" and "parser" attributes refer to the lexer
+and parser objects respectively.
+
+<blockquote>
+<pre>
+def p_expr_plus(t):
+ 'expr : expr PLUS expr'
+ ...
+ print t.parser # Show parser object
+ print t.lexer # Show lexer object
+</pre>
+</blockquote>
+
+If necessary, arbitrary attributes can be attached to the lexer or parser object.
+For example, if you wanted to have different parsing modes, you could attach a mode
+attribute to the parser object and look at it later.
+
+<h2>Using Python's Optimized Mode</h2>
+
+Because PLY uses information from doc-strings, parsing and lexing
+information must be gathered while running the Python interpreter in
+normal mode (i.e., not with the -O or -OO options). However, if you
+specify optimized mode like this:
+
+<blockquote>
+<pre>
+lex.lex(optimize=1)
+yacc.yacc(optimize=1)
+</pre>
+</blockquote>
+
+then PLY can later be used when Python runs in optimized mode. To make this work,
+make sure you first run Python in normal mode. Once the lexing and parsing tables
+have been generated the first time, run Python in optimized mode. PLY will use
+the tables without the need for doc strings.
+
+<p>
+Beware: running PLY in optimized mode disables a lot of error
+checking. You should only do this when your project has stabilized
+and you don't need to do any debugging.
+
+<h2>Where to go from here?</h2>
+
+The <tt>examples</tt> directory of the PLY distribution contains several simple examples. Please consult a
+compilers textbook for the theory and underlying implementation details or LR parsing.
+
+</body>
+</html>
+
+
+
+
+
+
+
diff --git a/ext/ply/example/ansic/README b/ext/ply/example/ansic/README
new file mode 100644
index 000000000..e049d3b4e
--- /dev/null
+++ b/ext/ply/example/ansic/README
@@ -0,0 +1,2 @@
+This example is incomplete. Was going to specify an ANSI C parser.
+This is part of it.
diff --git a/ext/ply/example/ansic/clex.py b/ext/ply/example/ansic/clex.py
new file mode 100644
index 000000000..afd995208
--- /dev/null
+++ b/ext/ply/example/ansic/clex.py
@@ -0,0 +1,161 @@
+# ----------------------------------------------------------------------
+# clex.py
+#
+# A lexer for ANSI C.
+# ----------------------------------------------------------------------
+
+import lex
+
+# Reserved words
+reserved = (
+ 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
+ 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
+ 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
+ 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
+ )
+
+tokens = reserved + (
+ # Literals (identifier, integer constant, float constant, string constant, char const)
+ 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
+
+ # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
+ 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
+ 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
+ 'LOR', 'LAND', 'LNOT',
+ 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
+
+ # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
+ 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
+ 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
+
+ # Increment/decrement (++,--)
+ 'PLUSPLUS', 'MINUSMINUS',
+
+ # Structure dereference (->)
+ 'ARROW',
+
+ # Conditional operator (?)
+ 'CONDOP',
+
+ # Delimeters ( ) [ ] { } , . ; :
+ 'LPAREN', 'RPAREN',
+ 'LBRACKET', 'RBRACKET',
+ 'LBRACE', 'RBRACE',
+ 'COMMA', 'PERIOD', 'SEMI', 'COLON',
+
+ # Ellipsis (...)
+ 'ELLIPSIS',
+ )
+
+# Completely ignored characters
+t_ignore = ' \t\x0c'
+
+# Newlines
+def t_NEWLINE(t):
+ r'\n+'
+ t.lineno += t.value.count("\n")
+
+# Operators
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_TIMES = r'\*'
+t_DIVIDE = r'/'
+t_MOD = r'%'
+t_OR = r'\|'
+t_AND = r'&'
+t_NOT = r'~'
+t_XOR = r'^'
+t_LSHIFT = r'<<'
+t_RSHIFT = r'>>'
+t_LOR = r'\|\|'
+t_LAND = r'&&'
+t_LNOT = r'!'
+t_LT = r'<'
+t_GT = r'>'
+t_LE = r'<='
+t_GE = r'>='
+t_EQ = r'=='
+t_NE = r'!='
+
+# Assignment operators
+
+t_EQUALS = r'='
+t_TIMESEQUAL = r'\*='
+t_DIVEQUAL = r'/='
+t_MODEQUAL = r'%='
+t_PLUSEQUAL = r'\+='
+t_MINUSEQUAL = r'-='
+t_LSHIFTEQUAL = r'<<='
+t_RSHIFTEQUAL = r'>>='
+t_ANDEQUAL = r'&='
+t_OREQUAL = r'\|='
+t_XOREQUAL = r'^='
+
+# Increment/decrement
+t_PLUSPLUS = r'\+\+'
+t_MINUSMINUS = r'--'
+
+# ->
+t_ARROW = r'->'
+
+# ?
+t_CONDOP = r'\?'
+
+# Delimeters
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_LBRACKET = r'\['
+t_RBRACKET = r'\]'
+t_LBRACE = r'\{'
+t_RBRACE = r'\}'
+t_COMMA = r','
+t_PERIOD = r'\.'
+t_SEMI = r';'
+t_COLON = r':'
+t_ELLIPSIS = r'\.\.\.'
+
+# Identifiers and reserved words
+
+reserved_map = { }
+for r in reserved:
+ reserved_map[r.lower()] = r
+
+def t_ID(t):
+ r'[A-Za-z_][\w_]*'
+ t.type = reserved_map.get(t.value,"ID")
+ return t
+
+# Integer literal
+t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
+
+# Floating literal
+t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
+
+# String literal
+t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
+
+# Character constant 'c' or L'c'
+t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
+
+# Comments
+def t_comment(t):
+ r' /\*(.|\n)*?\*/'
+ t.lineno += t.value.count('\n')
+
+# Preprocessor directive (ignored)
+def t_preprocessor(t):
+ r'\#(.)*?\n'
+ t.lineno += 1
+
+def t_error(t):
+ print "Illegal character %s" % repr(t.value[0])
+ t.skip(1)
+
+lexer = lex.lex(optimize=1)
+if __name__ == "__main__":
+ lex.runmain(lexer)
+
+
+
+
+
diff --git a/ext/ply/example/ansic/cparse.py b/ext/ply/example/ansic/cparse.py
new file mode 100644
index 000000000..ddfd5c72b
--- /dev/null
+++ b/ext/ply/example/ansic/cparse.py
@@ -0,0 +1,859 @@
+# -----------------------------------------------------------------------------
+# cparse.py
+#
+# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed.
+# -----------------------------------------------------------------------------
+
+import yacc
+import clex
+
+# Get the token map
+tokens = clex.tokens
+
+# translation-unit:
+
+def p_translation_unit_1(t):
+ 'translation_unit : external_declaration'
+ pass
+
+def p_translation_unit_2(t):
+ 'translation_unit : translation_unit external_declaration'
+ pass
+
+# external-declaration:
+
+def p_external_declaration_1(t):
+ 'external_declaration : function_definition'
+ pass
+
+def p_external_declaration_2(t):
+ 'external_declaration : declaration'
+ pass
+
+# function-definition:
+
+def p_function_definition_1(t):
+ 'function_definition : declaration_specifiers declarator declaration_list compound_statement'
+ pass
+
+def p_function_definition_2(t):
+ 'function_definition : declarator declaration_list compound_statement'
+ pass
+
+def p_function_definition_3(t):
+ 'function_definition : declarator compound_statement'
+ pass
+
+def p_function_definition_4(t):
+ 'function_definition : declaration_specifiers declarator compound_statement'
+ pass
+
+# declaration:
+
+def p_declaration_1(t):
+ 'declaration : declaration_specifiers init_declarator_list SEMI'
+ pass
+
+def p_declaration_2(t):
+ 'declaration : declaration_specifiers SEMI'
+ pass
+
+# declaration-list:
+
+def p_declaration_list_1(t):
+ 'declaration_list : declaration'
+ pass
+
+def p_declaration_list_2(t):
+ 'declaration_list : declaration_list declaration '
+ pass
+
+# declaration-specifiers
+def p_declaration_specifiers_1(t):
+ 'declaration_specifiers : storage_class_specifier declaration_specifiers'
+ pass
+
+def p_declaration_specifiers_2(t):
+ 'declaration_specifiers : type_specifier declaration_specifiers'
+ pass
+
+def p_declaration_specifiers_3(t):
+ 'declaration_specifiers : type_qualifier declaration_specifiers'
+ pass
+
+def p_declaration_specifiers_4(t):
+ 'declaration_specifiers : storage_class_specifier'
+ pass
+
+def p_declaration_specifiers_5(t):
+ 'declaration_specifiers : type_specifier'
+ pass
+
+def p_declaration_specifiers_6(t):
+ 'declaration_specifiers : type_qualifier'
+ pass
+
+# storage-class-specifier
+def p_storage_class_specifier(t):
+ '''storage_class_specifier : AUTO
+ | REGISTER
+ | STATIC
+ | EXTERN
+ | TYPEDEF
+ '''
+ pass
+
+# type-specifier:
+def p_type_specifier(t):
+ '''type_specifier : VOID
+ | CHAR
+ | SHORT
+ | INT
+ | LONG
+ | FLOAT
+ | DOUBLE
+ | SIGNED
+ | UNSIGNED
+ | struct_or_union_specifier
+ | enum_specifier
+ | TYPEID
+ '''
+ pass
+
+# type-qualifier:
+def p_type_qualifier(t):
+ '''type_qualifier : CONST
+ | VOLATILE'''
+ pass
+
+# struct-or-union-specifier
+
+def p_struct_or_union_specifier_1(t):
+ 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE'
+ pass
+
+def p_struct_or_union_specifier_2(t):
+ 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE'
+ pass
+
+def p_struct_or_union_specifier_3(t):
+ 'struct_or_union_specifier : struct_or_union ID'
+ pass
+
+# struct-or-union:
+def p_struct_or_union(t):
+ '''struct_or_union : STRUCT
+ | UNION
+ '''
+ pass
+
+# struct-declaration-list:
+
+def p_struct_declaration_list_1(t):
+ 'struct_declaration_list : struct_declaration'
+ pass
+
+def p_struct_declaration_list_2(t):
+ 'struct_declaration_list : struct_declarator_list struct_declaration'
+ pass
+
+# init-declarator-list:
+
+def p_init_declarator_list_1(t):
+ 'init_declarator_list : init_declarator'
+ pass
+
+def p_init_declarator_list_2(t):
+ 'init_declarator_list : init_declarator_list COMMA init_declarator'
+ pass
+
+# init-declarator
+
+def p_init_declarator_1(t):
+ 'init_declarator : declarator'
+ pass
+
+def p_init_declarator_2(t):
+ 'init_declarator : declarator EQUALS initializer'
+ pass
+
+# struct-declaration:
+
+def p_struct_declaration(t):
+ 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI'
+ pass
+
+# specifier-qualifier-list:
+
+def p_specifier_qualifier_list_1(t):
+ 'specifier_qualifier_list : type_specifier specifier_qualifier_list'
+ pass
+
+def p_specifier_qualifier_list_2(t):
+ 'specifier_qualifier_list : type_specifier'
+ pass
+
+def p_specifier_qualifier_list_3(t):
+ 'specifier_qualifier_list : type_qualifier specifier_qualifier_list'
+ pass
+
+def p_specifier_qualifier_list_4(t):
+ 'specifier_qualifier_list : type_qualifier'
+ pass
+
+# struct-declarator-list:
+
+def p_struct_declarator_list_1(t):
+ 'struct_declarator_list : struct_declarator'
+ pass
+
+def p_struct_declarator_list_2(t):
+ 'struct_declarator_list : struct_declarator_list COMMA struct_declarator'
+ pass
+
+# struct-declarator:
+
+def p_struct_declarator_1(t):
+ 'struct_declarator : declarator'
+ pass
+
+def p_struct_declarator_2(t):
+ 'struct_declarator : declarator COLON constant_expression'
+ pass
+
+def p_struct_declarator_3(t):
+ 'struct_declarator : COLON constant_expression'
+ pass
+
+# enum-specifier:
+
+def p_enum_specifier_1(t):
+ 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE'
+ pass
+
+def p_enum_specifier_2(t):
+ 'enum_specifier : ENUM LBRACE enumerator_list RBRACE'
+ pass
+
+def p_enum_specifier_3(t):
+ 'enum_specifier : ENUM ID'
+ pass
+
+# enumerator_list:
+def p_enumerator_list_1(t):
+ 'enumerator_list : enumerator'
+ pass
+
+def p_enumerator_list_2(t):
+ 'enumerator_list : enumerator_list COMMA enumerator'
+ pass
+
+# enumerator:
+def p_enumerator_1(t):
+ 'enumerator : ID'
+ pass
+
+def p_enumerator_2(t):
+ 'enumerator : ID EQUALS constant_expression'
+ pass
+
+# declarator:
+
+def p_declarator_1(t):
+ 'declarator : pointer direct_declarator'
+ pass
+
+def p_declarator_2(t):
+ 'declarator : direct_declarator'
+ pass
+
+# direct-declarator:
+
+def p_direct_declarator_1(t):
+ 'direct_declarator : ID'
+ pass
+
+def p_direct_declarator_2(t):
+ 'direct_declarator : LPAREN declarator RPAREN'
+ pass
+
+def p_direct_declarator_3(t):
+ 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET'
+ pass
+
+def p_direct_declarator_4(t):
+ 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN '
+ pass
+
+def p_direct_declarator_5(t):
+ 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN '
+ pass
+
+def p_direct_declarator_6(t):
+ 'direct_declarator : direct_declarator LPAREN RPAREN '
+ pass
+
+# pointer:
+def p_pointer_1(t):
+ 'pointer : TIMES type_qualifier_list'
+ pass
+
+def p_pointer_2(t):
+ 'pointer : TIMES'
+ pass
+
+def p_pointer_3(t):
+ 'pointer : TIMES type_qualifier_list pointer'
+ pass
+
+def p_pointer_4(t):
+ 'pointer : TIMES pointer'
+ pass
+
+# type-qualifier-list:
+
+def p_type_qualifier_list_1(t):
+ 'type_qualifier_list : type_qualifier'
+ pass
+
+def p_type_qualifier_list_2(t):
+ 'type_qualifier_list : type_qualifier_list type_qualifier'
+ pass
+
+# parameter-type-list:
+
+def p_parameter_type_list_1(t):
+ 'parameter_type_list : parameter_list'
+ pass
+
+def p_parameter_type_list_2(t):
+ 'parameter_type_list : parameter_list COMMA ELLIPSIS'
+ pass
+
+# parameter-list:
+
+def p_parameter_list_1(t):
+ 'parameter_list : parameter_declaration'
+ pass
+
+def p_parameter_list_2(t):
+ 'parameter_list : parameter_list COMMA parameter_declaration'
+ pass
+
+# parameter-declaration:
+def p_parameter_declaration_1(t):
+ 'parameter_declaration : declaration_specifiers declarator'
+ pass
+
+def p_parameter_declaration_2(t):
+ 'parameter_declaration : declaration_specifiers abstract_declarator_opt'
+ pass
+
+# identifier-list:
+def p_identifier_list_1(t):
+ 'identifier_list : ID'
+ pass
+
+def p_identifier_list_2(t):
+ 'identifier_list : identifier_list COMMA ID'
+ pass
+
+# initializer:
+
+def p_initializer_1(t):
+ 'initializer : assignment_expression'
+ pass
+
+def p_initializer_2(t):
+ '''initializer : LBRACE initializer_list RBRACE
+ | LBRACE initializer_list COMMA RBRACE'''
+ pass
+
+# initializer-list:
+
+def p_initializer_list_1(t):
+ 'initializer_list : initializer'
+ pass
+
+def p_initializer_list_2(t):
+ 'initializer_list : initializer_list COMMA initializer'
+ pass
+
+# type-name:
+
+def p_type_name(t):
+ 'type_name : specifier_qualifier_list abstract_declarator_opt'
+ pass
+
+def p_abstract_declarator_opt_1(t):
+ 'abstract_declarator_opt : empty'
+ pass
+
+def p_abstract_declarator_opt_2(t):
+ 'abstract_declarator_opt : abstract_declarator'
+ pass
+
+# abstract-declarator:
+
+def p_abstract_declarator_1(t):
+ 'abstract_declarator : pointer '
+ pass
+
+def p_abstract_declarator_2(t):
+ 'abstract_declarator : pointer direct_abstract_declarator'
+ pass
+
+def p_abstract_declarator_3(t):
+ 'abstract_declarator : direct_abstract_declarator'
+ pass
+
+# direct-abstract-declarator:
+
+def p_direct_abstract_declarator_1(t):
+ 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN'
+ pass
+
+def p_direct_abstract_declarator_2(t):
+ 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET'
+ pass
+
+def p_direct_abstract_declarator_3(t):
+ 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET'
+ pass
+
+def p_direct_abstract_declarator_4(t):
+ 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN'
+ pass
+
+def p_direct_abstract_declarator_5(t):
+ 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN'
+ pass
+
+# Optional fields in abstract declarators
+
+def p_constant_expression_opt_1(t):
+ 'constant_expression_opt : empty'
+ pass
+
+def p_constant_expression_opt_2(t):
+ 'constant_expression_opt : constant_expression'
+ pass
+
+def p_parameter_type_list_opt_1(t):
+ 'parameter_type_list_opt : empty'
+ pass
+
+def p_parameter_type_list_opt_2(t):
+ 'parameter_type_list_opt : parameter_type_list'
+ pass
+
+# statement:
+
+def p_statement(t):
+ '''
+ statement : labeled_statement
+ | expression_statement
+ | compound_statement
+ | selection_statement
+ | iteration_statement
+ | jump_statement
+ '''
+ pass
+
+# labeled-statement:
+
+def p_labeled_statement_1(t):
+ 'labeled_statement : ID COLON statement'
+ pass
+
+def p_labeled_statement_2(t):
+ 'labeled_statement : CASE constant_expression COLON statement'
+ pass
+
+def p_labeled_statement_3(t):
+ 'labeled_statement : DEFAULT COLON statement'
+ pass
+
+# expression-statement:
+def p_expression_statement(t):
+ 'expression_statement : expression_opt SEMI'
+ pass
+
+# compound-statement:
+
+def p_compound_statement_1(t):
+ 'compound_statement : LBRACE declaration_list statement_list RBRACE'
+ pass
+
+def p_compound_statement_2(t):
+ 'compound_statement : LBRACE statement_list RBRACE'
+ pass
+
+def p_compound_statement_3(t):
+ 'compound_statement : LBRACE declaration_list RBRACE'
+ pass
+
+def p_compound_statement_4(t):
+ 'compound_statement : LBRACE RBRACE'
+ pass
+
+# statement-list:
+
+def p_statement_list_1(t):
+ 'statement_list : statement'
+ pass
+
+def p_statement_list_2(t):
+ 'statement_list : statement_list statement'
+ pass
+
+# selection-statement
+
+def p_selection_statement_1(t):
+ 'selection_statement : IF LPAREN expression RPAREN statement'
+ pass
+
+def p_selection_statement_2(t):
+ 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement '
+ pass
+
+def p_selection_statement_3(t):
+ 'selection_statement : SWITCH LPAREN expression RPAREN statement '
+ pass
+
+# iteration_statement:
+
+def p_iteration_statement_1(t):
+ 'iteration_statement : WHILE LPAREN expression RPAREN statement'
+ pass
+
+def p_iteration_statement_2(t):
+ 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement '
+ pass
+
+def p_iteration_statement_3(t):
+ 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI'
+ pass
+
+# jump_statement:
+
+def p_jump_statement_1(t):
+ 'jump_statement : GOTO ID SEMI'
+ pass
+
+def p_jump_statement_2(t):
+ 'jump_statement : CONTINUE SEMI'
+ pass
+
+def p_jump_statement_3(t):
+ 'jump_statement : BREAK SEMI'
+ pass
+
+def p_jump_statement_4(t):
+ 'jump_statement : RETURN expression_opt SEMI'
+ pass
+
+def p_expression_opt_1(t):
+ 'expression_opt : empty'
+ pass
+
+def p_expression_opt_2(t):
+ 'expression_opt : expression'
+ pass
+
+# expression:
+def p_expression_1(t):
+ 'expression : assignment_expression'
+ pass
+
+def p_expression_2(t):
+ 'expression : expression COMMA assignment_expression'
+ pass
+
+# assigment_expression:
+def p_assignment_expression_1(t):
+ 'assignment_expression : conditional_expression'
+ pass
+
+def p_assignment_expression_2(t):
+ 'assignment_expression : unary_expression assignment_operator assignment_expression'
+ pass
+
+# assignment_operator:
+def p_assignment_operator(t):
+ '''
+ assignment_operator : EQUALS
+ | TIMESEQUAL
+ | DIVEQUAL
+ | MODEQUAL
+ | PLUSEQUAL
+ | MINUSEQUAL
+ | LSHIFTEQUAL
+ | RSHIFTEQUAL
+ | ANDEQUAL
+ | OREQUAL
+ | XOREQUAL
+ '''
+ pass
+
+# conditional-expression
+def p_conditional_expression_1(t):
+ 'conditional_expression : logical_or_expression'
+ pass
+
+def p_conditional_expression_2(t):
+ 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression '
+ pass
+
+# constant-expression
+
+def p_constant_expression(t):
+ 'constant_expression : conditional_expression'
+ pass
+
+# logical-or-expression
+
+def p_logical_or_expression_1(t):
+ 'logical_or_expression : logical_and_expression'
+ pass
+
+def p_logical_or_expression_2(t):
+ 'logical_or_expression : logical_or_expression LOR logical_and_expression'
+ pass
+
+# logical-and-expression
+
+def p_logical_and_expression_1(t):
+ 'logical_and_expression : inclusive_or_expression'
+ pass
+
+def p_logical_and_expression_2(t):
+ 'logical_and_expression : logical_and_expression LAND inclusive_or_expression'
+ pass
+
+# inclusive-or-expression:
+
+def p_inclusive_or_expression_1(t):
+ 'inclusive_or_expression : exclusive_or_expression'
+ pass
+
+def p_inclusive_or_expression_2(t):
+ 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression'
+ pass
+
+# exclusive-or-expression:
+
+def p_exclusive_or_expression_1(t):
+ 'exclusive_or_expression : and_expression'
+ pass
+
+def p_exclusive_or_expression_2(t):
+ 'exclusive_or_expression : exclusive_or_expression XOR and_expression'
+ pass
+
+# AND-expression
+
+def p_and_expression_1(t):
+ 'and_expression : equality_expression'
+ pass
+
+def p_and_expression_2(t):
+ 'and_expression : and_expression AND equality_expression'
+ pass
+
+
+# equality-expression:
+def p_equality_expression_1(t):
+ 'equality_expression : relational_expression'
+ pass
+
+def p_equality_expression_2(t):
+ 'equality_expression : equality_expression EQ relational_expression'
+ pass
+
+def p_equality_expression_3(t):
+ 'equality_expression : equality_expression NE relational_expression'
+ pass
+
+
+# relational-expression:
+def p_relational_expression_1(t):
+ 'relational_expression : shift_expression'
+ pass
+
+def p_relational_expression_2(t):
+ 'relational_expression : relational_expression LT shift_expression'
+ pass
+
+def p_relational_expression_3(t):
+ 'relational_expression : relational_expression GT shift_expression'
+ pass
+
+def p_relational_expression_4(t):
+ 'relational_expression : relational_expression LE shift_expression'
+ pass
+
+def p_relational_expression_5(t):
+ 'relational_expression : relational_expression GE shift_expression'
+ pass
+
+# shift-expression
+
+def p_shift_expression_1(t):
+ 'shift_expression : additive_expression'
+ pass
+
+def p_shift_expression_2(t):
+ 'shift_expression : shift_expression LSHIFT additive_expression'
+ pass
+
+def p_shift_expression_3(t):
+ 'shift_expression : shift_expression RSHIFT additive_expression'
+ pass
+
+# additive-expression
+
+def p_additive_expression_1(t):
+ 'additive_expression : multiplicative_expression'
+ pass
+
+def p_additive_expression_2(t):
+ 'additive_expression : additive_expression PLUS multiplicative_expression'
+ pass
+
+def p_additive_expression_3(t):
+ 'additive_expression : additive_expression MINUS multiplicative_expression'
+ pass
+
+# multiplicative-expression
+
+def p_multiplicative_expression_1(t):
+ 'multiplicative_expression : cast_expression'
+ pass
+
+def p_multiplicative_expression_2(t):
+ 'multiplicative_expression : multiplicative_expression TIMES cast_expression'
+ pass
+
+def p_multiplicative_expression_3(t):
+ 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression'
+ pass
+
+def p_multiplicative_expression_4(t):
+ 'multiplicative_expression : multiplicative_expression MOD cast_expression'
+ pass
+
+# cast-expression:
+
+def p_cast_expression_1(t):
+ 'cast_expression : unary_expression'
+ pass
+
+def p_cast_expression_2(t):
+ 'cast_expression : LPAREN type_name RPAREN cast_expression'
+ pass
+
+# unary-expression:
+def p_unary_expression_1(t):
+ 'unary_expression : postfix_expression'
+ pass
+
+def p_unary_expression_2(t):
+ 'unary_expression : PLUSPLUS unary_expression'
+ pass
+
+def p_unary_expression_3(t):
+ 'unary_expression : MINUSMINUS unary_expression'
+ pass
+
+def p_unary_expression_4(t):
+ 'unary_expression : unary_operator cast_expression'
+ pass
+
+def p_unary_expression_5(t):
+ 'unary_expression : SIZEOF unary_expression'
+ pass
+
+def p_unary_expression_6(t):
+ 'unary_expression : SIZEOF LPAREN type_name RPAREN'
+ pass
+
+#unary-operator
+def p_unary_operator(t):
+ '''unary_operator : AND
+ | TIMES
+ | PLUS
+ | MINUS
+ | NOT
+ | LNOT '''
+ pass
+
+# postfix-expression:
+def p_postfix_expression_1(t):
+ 'postfix_expression : primary_expression'
+ pass
+
+def p_postfix_expression_2(t):
+ 'postfix_expression : postfix_expression LBRACKET expression RBRACKET'
+ pass
+
+def p_postfix_expression_3(t):
+ 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN'
+ pass
+
+def p_postfix_expression_4(t):
+ 'postfix_expression : postfix_expression LPAREN RPAREN'
+ pass
+
+def p_postfix_expression_5(t):
+ 'postfix_expression : postfix_expression PERIOD ID'
+ pass
+
+def p_postfix_expression_6(t):
+ 'postfix_expression : postfix_expression ARROW ID'
+ pass
+
+def p_postfix_expression_7(t):
+ 'postfix_expression : postfix_expression PLUSPLUS'
+ pass
+
+def p_postfix_expression_8(t):
+ 'postfix_expression : postfix_expression MINUSMINUS'
+ pass
+
+# primary-expression:
+def p_primary_expression(t):
+ '''primary_expression : ID
+ | constant
+ | SCONST
+ | LPAREN expression RPAREN'''
+ pass
+
+# argument-expression-list:
+def p_argument_expression_list(t):
+ '''argument_expression_list : assignment_expression
+ | argument_expression_list COMMA assignment_expression'''
+ pass
+
+# constant:
+def p_constant(t):
+ '''constant : ICONST
+ | FCONST
+ | CCONST'''
+ pass
+
+
+def p_empty(t):
+ 'empty : '
+ pass
+
+def p_error(t):
+ print "Whoa. We're hosed"
+
+import profile
+# Build the grammar
+profile.run("yacc.yacc()")
+
+
+
+
diff --git a/ext/ply/example/calc/calc.py b/ext/ply/example/calc/calc.py
new file mode 100644
index 000000000..aeb23c246
--- /dev/null
+++ b/ext/ply/example/calc/calc.py
@@ -0,0 +1,108 @@
+# -----------------------------------------------------------------------------
+# calc.py
+#
+# A simple calculator with variables. This is from O'Reilly's
+# "Lex and Yacc", p. 63.
+# -----------------------------------------------------------------------------
+
+tokens = (
+ 'NAME','NUMBER',
+ 'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
+ 'LPAREN','RPAREN',
+ )
+
+# Tokens
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_TIMES = r'\*'
+t_DIVIDE = r'/'
+t_EQUALS = r'='
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
+
+def t_NUMBER(t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Integer value too large", t.value
+ t.value = 0
+ return t
+
+t_ignore = " \t"
+
+def t_newline(t):
+ r'\n+'
+ t.lineno += t.value.count("\n")
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.skip(1)
+
+# Build the lexer
+import lex
+lex.lex()
+
+# Parsing rules
+
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[2] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+while 1:
+ try:
+ s = raw_input('calc > ')
+ except EOFError:
+ break
+ yacc.parse(s)
diff --git a/ext/ply/example/hedit/hedit.py b/ext/ply/example/hedit/hedit.py
new file mode 100644
index 000000000..f00427bf5
--- /dev/null
+++ b/ext/ply/example/hedit/hedit.py
@@ -0,0 +1,44 @@
+# -----------------------------------------------------------------------------
+# hedit.py
+#
+# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson)
+#
+# These tokens can't be easily tokenized because they are of the following
+# form:
+#
+# nHc1...cn
+#
+# where n is a positive integer and c1 ... cn are characters.
+#
+# This example shows how to modify the state of the lexer to parse
+# such tokens
+# -----------------------------------------------------------------------------
+
+tokens = (
+ 'H_EDIT_DESCRIPTOR',
+ )
+
+# Tokens
+t_ignore = " \t\n"
+
+def t_H_EDIT_DESCRIPTOR(t):
+ r"\d+H.*" # This grabs all of the remaining text
+ i = t.value.index('H')
+ n = eval(t.value[:i])
+
+ # Adjust the tokenizing position
+ t.lexer.lexpos -= len(t.value) - (i+1+n)
+
+ t.value = t.value[i+1:i+1+n]
+ return t
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.skip(1)
+
+# Build the lexer
+import lex
+lex.lex()
+lex.runmain()
+
+
diff --git a/ext/ply/example/optcalc/README b/ext/ply/example/optcalc/README
new file mode 100644
index 000000000..6d196f0ee
--- /dev/null
+++ b/ext/ply/example/optcalc/README
@@ -0,0 +1,9 @@
+An example showing how to use Python optimized mode.
+To run:
+
+ - First run 'python calc.py'
+
+ - Then run 'python -OO calc.py'
+
+If working corretly, the second version should run the
+same way.
diff --git a/ext/ply/example/optcalc/calc.py b/ext/ply/example/optcalc/calc.py
new file mode 100644
index 000000000..fa66cda5b
--- /dev/null
+++ b/ext/ply/example/optcalc/calc.py
@@ -0,0 +1,110 @@
+# -----------------------------------------------------------------------------
+# calc.py
+#
+# A simple calculator with variables. This is from O'Reilly's
+# "Lex and Yacc", p. 63.
+# -----------------------------------------------------------------------------
+
+tokens = (
+ 'NAME','NUMBER',
+ 'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
+ 'LPAREN','RPAREN',
+ )
+
+# Tokens
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_TIMES = r'\*'
+t_DIVIDE = r'/'
+t_EQUALS = r'='
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
+
+def t_NUMBER(t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Integer value too large", t.value
+ t.value = 0
+ return t
+
+t_ignore = " \t"
+
+def t_newline(t):
+ r'\n+'
+ t.lineno += t.value.count("\n")
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.skip(1)
+
+# Build the lexer
+import lex
+lex.lex(optimize=1)
+
+# Parsing rules
+
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[2] == '/': t[0] = t[1] / t[3]
+ elif t[2] == '<': t[0] = t[1] < t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc(optimize=1)
+
+while 1:
+ try:
+ s = raw_input('calc > ')
+ except EOFError:
+ break
+ yacc.parse(s)
+
diff --git a/ext/ply/lex.py b/ext/ply/lex.py
new file mode 100644
index 000000000..7ad7a394b
--- /dev/null
+++ b/ext/ply/lex.py
@@ -0,0 +1,681 @@
+#-----------------------------------------------------------------------------
+# ply: lex.py
+#
+# Author: David M. Beazley (beazley@cs.uchicago.edu)
+# Department of Computer Science
+# University of Chicago
+# Chicago, IL 60637
+#
+# Copyright (C) 2001, David M. Beazley
+#
+# $Header: /home/stever/bk/newmem2/ext/ply/lex.py 1.1 03/06/06 14:53:34-00:00 stever@ $
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# See the file COPYING for a complete copy of the LGPL.
+#
+#
+# This module automatically constructs a lexical analysis module from regular
+# expression rules defined in a user-defined module. The idea is essentially the same
+# as that used in John Aycock's Spark framework, but the implementation works
+# at the module level rather than requiring the use of classes.
+#
+# This module tries to provide an interface that is closely modeled after
+# the traditional lex interface in Unix. It also differs from Spark
+# in that:
+#
+# - It provides more extensive error checking and reporting if
+# the user supplies a set of regular expressions that can't
+# be compiled or if there is any other kind of a problem in
+# the specification.
+#
+# - The interface is geared towards LALR(1) and LR(1) parser
+# generators. That is tokens are generated one at a time
+# rather than being generated in advanced all in one step.
+#
+# There are a few limitations of this module
+#
+# - The module interface makes it somewhat awkward to support more
+# than one lexer at a time. Although somewhat inelegant from a
+# design perspective, this is rarely a practical concern for
+# most compiler projects.
+#
+# - The lexer requires that the entire input text be read into
+# a string before scanning. I suppose that most machines have
+# enough memory to make this a minor issues, but it makes
+# the lexer somewhat difficult to use in interactive sessions
+# or with streaming data.
+#
+#-----------------------------------------------------------------------------
+
+r"""
+lex.py
+
+This module builds lex-like scanners based on regular expression rules.
+To use the module, simply write a collection of regular expression rules
+and actions like this:
+
+# lexer.py
+import lex
+
+# Define a list of valid tokens
+tokens = (
+ 'IDENTIFIER', 'NUMBER', 'PLUS', 'MINUS'
+ )
+
+# Define tokens as functions
+def t_IDENTIFIER(t):
+ r' ([a-zA-Z_](\w|_)* '
+ return t
+
+def t_NUMBER(t):
+ r' \d+ '
+ return t
+
+# Some simple tokens with no actions
+t_PLUS = r'\+'
+t_MINUS = r'-'
+
+# Initialize the lexer
+lex.lex()
+
+The tokens list is required and contains a complete list of all valid
+token types that the lexer is allowed to produce. Token types are
+restricted to be valid identifiers. This means that 'MINUS' is a valid
+token type whereas '-' is not.
+
+Rules are defined by writing a function with a name of the form
+t_rulename. Each rule must accept a single argument which is
+a token object generated by the lexer. This token has the following
+attributes:
+
+ t.type = type string of the token. This is initially set to the
+ name of the rule without the leading t_
+ t.value = The value of the lexeme.
+ t.lineno = The value of the line number where the token was encountered
+
+For example, the t_NUMBER() rule above might be called with the following:
+
+ t.type = 'NUMBER'
+ t.value = '42'
+ t.lineno = 3
+
+Each rule returns the token object it would like to supply to the
+parser. In most cases, the token t is returned with few, if any
+modifications. To discard a token for things like whitespace or
+comments, simply return nothing. For instance:
+
+def t_whitespace(t):
+ r' \s+ '
+ pass
+
+For faster lexing, you can also define this in terms of the ignore set like this:
+
+t_ignore = ' \t'
+
+The characters in this string are ignored by the lexer. Use of this feature can speed
+up parsing significantly since scanning will immediately proceed to the next token.
+
+lex requires that the token returned by each rule has an attribute
+t.type. Other than this, rules are free to return any kind of token
+object that they wish and may construct a new type of token object
+from the attributes of t (provided the new object has the required
+type attribute).
+
+If illegal characters are encountered, the scanner executes the
+function t_error(t) where t is a token representing the rest of the
+string that hasn't been matched. If this function isn't defined, a
+LexError exception is raised. The .text attribute of this exception
+object contains the part of the string that wasn't matched.
+
+The t.skip(n) method can be used to skip ahead n characters in the
+input stream. This is usually only used in the error handling rule.
+For instance, the following rule would print an error message and
+continue:
+
+def t_error(t):
+ print "Illegal character in input %s" % t.value[0]
+ t.skip(1)
+
+Of course, a nice scanner might wish to skip more than one character
+if the input looks very corrupted.
+
+The lex module defines a t.lineno attribute on each token that can be used
+to track the current line number in the input. The value of this
+variable is not modified by lex so it is up to your lexer module
+to correctly update its value depending on the lexical properties
+of the input language. To do this, you might write rules such as
+the following:
+
+def t_newline(t):
+ r' \n+ '
+ t.lineno += t.value.count("\n")
+
+To initialize your lexer so that it can be used, simply call the lex.lex()
+function in your rule file. If there are any errors in your
+specification, warning messages or an exception will be generated to
+alert you to the problem.
+
+(dave: this needs to be rewritten)
+To use the newly constructed lexer from another module, simply do
+this:
+
+ import lex
+ import lexer
+ plex.input("position = initial + rate*60")
+
+ while 1:
+ token = plex.token() # Get a token
+ if not token: break # No more tokens
+ ... do whatever ...
+
+Assuming that the module 'lexer' has initialized plex as shown
+above, parsing modules can safely import 'plex' without having
+to import the rule file or any additional imformation about the
+scanner you have defined.
+"""
+
+# -----------------------------------------------------------------------------
+
+
+__version__ = "1.3"
+
+import re, types, sys, copy
+
+# Exception thrown when invalid token encountered and no default
+class LexError(Exception):
+ def __init__(self,message,s):
+ self.args = (message,)
+ self.text = s
+
+# Token class
+class LexToken:
+ def __str__(self):
+ return "LexToken(%s,%r,%d)" % (self.type,self.value,self.lineno)
+ def __repr__(self):
+ return str(self)
+ def skip(self,n):
+ try:
+ self._skipn += n
+ except AttributeError:
+ self._skipn = n
+
+# -----------------------------------------------------------------------------
+# Lexer class
+#
+# input() - Store a new string in the lexer
+# token() - Get the next token
+# -----------------------------------------------------------------------------
+
+class Lexer:
+ def __init__(self):
+ self.lexre = None # Master regular expression
+ self.lexdata = None # Actual input data (as a string)
+ self.lexpos = 0 # Current position in input text
+ self.lexlen = 0 # Length of the input text
+ self.lexindexfunc = [ ] # Reverse mapping of groups to functions and types
+ self.lexerrorf = None # Error rule (if any)
+ self.lextokens = None # List of valid tokens
+ self.lexignore = None # Ignored characters
+ self.lineno = 1 # Current line number
+ self.debug = 0 # Debugging mode
+ self.optimize = 0 # Optimized mode
+ self.token = self.errtoken
+
+ def __copy__(self):
+ c = Lexer()
+ c.lexre = self.lexre
+ c.lexdata = self.lexdata
+ c.lexpos = self.lexpos
+ c.lexlen = self.lexlen
+ c.lenindexfunc = self.lexindexfunc
+ c.lexerrorf = self.lexerrorf
+ c.lextokens = self.lextokens
+ c.lexignore = self.lexignore
+ c.lineno = self.lineno
+ c.optimize = self.optimize
+ c.token = c.realtoken
+
+ # ------------------------------------------------------------
+ # input() - Push a new string into the lexer
+ # ------------------------------------------------------------
+ def input(self,s):
+ if not isinstance(s,types.StringType):
+ raise ValueError, "Expected a string"
+ self.lexdata = s
+ self.lexpos = 0
+ self.lexlen = len(s)
+ self.token = self.realtoken
+
+ # Change the token routine to point to realtoken()
+ global token
+ if token == self.errtoken:
+ token = self.token
+
+ # ------------------------------------------------------------
+ # errtoken() - Return error if token is called with no data
+ # ------------------------------------------------------------
+ def errtoken(self):
+ raise RuntimeError, "No input string given with input()"
+
+ # ------------------------------------------------------------
+ # token() - Return the next token from the Lexer
+ #
+ # Note: This function has been carefully implemented to be as fast
+ # as possible. Don't make changes unless you really know what
+ # you are doing
+ # ------------------------------------------------------------
+ def realtoken(self):
+ # Make local copies of frequently referenced attributes
+ lexpos = self.lexpos
+ lexlen = self.lexlen
+ lexignore = self.lexignore
+ lexdata = self.lexdata
+
+ while lexpos < lexlen:
+ # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
+ if lexdata[lexpos] in lexignore:
+ lexpos += 1
+ continue
+
+ # Look for a regular expression match
+ m = self.lexre.match(lexdata,lexpos)
+ if m:
+ i = m.lastindex
+ lexpos = m.end()
+ tok = LexToken()
+ tok.value = m.group()
+ tok.lineno = self.lineno
+ tok.lexer = self
+ func,tok.type = self.lexindexfunc[i]
+ if not func:
+ self.lexpos = lexpos
+ return tok
+
+ # If token is processed by a function, call it
+ self.lexpos = lexpos
+ newtok = func(tok)
+ self.lineno = tok.lineno # Update line number
+
+ # Every function must return a token, if nothing, we just move to next token
+ if not newtok: continue
+
+ # Verify type of the token. If not in the token map, raise an error
+ if not self.optimize:
+ if not self.lextokens.has_key(newtok.type):
+ raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
+ func.func_code.co_filename, func.func_code.co_firstlineno,
+ func.__name__, newtok.type),lexdata[lexpos:])
+
+ return newtok
+
+ # No match. Call t_error() if defined.
+ if self.lexerrorf:
+ tok = LexToken()
+ tok.value = self.lexdata[lexpos:]
+ tok.lineno = self.lineno
+ tok.type = "error"
+ tok.lexer = self
+ oldpos = lexpos
+ newtok = self.lexerrorf(tok)
+ lexpos += getattr(tok,"_skipn",0)
+ if oldpos == lexpos:
+ # Error method didn't change text position at all. This is an error.
+ self.lexpos = lexpos
+ raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
+ if not newtok: continue
+ self.lexpos = lexpos
+ return newtok
+
+ self.lexpos = lexpos
+ raise LexError, ("No match found", lexdata[lexpos:])
+
+ # No more input data
+ self.lexpos = lexpos + 1
+ return None
+
+
+# -----------------------------------------------------------------------------
+# validate_file()
+#
+# This checks to see if there are duplicated t_rulename() functions or strings
+# in the parser input file. This is done using a simple regular expression
+# match on each line in the filename.
+# -----------------------------------------------------------------------------
+
+def validate_file(filename):
+ import os.path
+ base,ext = os.path.splitext(filename)
+ if ext != '.py': return 1 # No idea what the file is. Return OK
+
+ try:
+ f = open(filename)
+ lines = f.readlines()
+ f.close()
+ except IOError:
+ return 1 # Oh well
+
+ fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
+ sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
+ counthash = { }
+ linen = 1
+ noerror = 1
+ for l in lines:
+ m = fre.match(l)
+ if not m:
+ m = sre.match(l)
+ if m:
+ name = m.group(1)
+ prev = counthash.get(name)
+ if not prev:
+ counthash[name] = linen
+ else:
+ print "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev)
+ noerror = 0
+ linen += 1
+ return noerror
+
+# -----------------------------------------------------------------------------
+# _read_lextab(module)
+#
+# Reads lexer table from a lextab file instead of using introspection.
+# -----------------------------------------------------------------------------
+
+def _read_lextab(lexer, fdict, module):
+ exec "import %s as lextab" % module
+ lexer.lexre = re.compile(lextab._lexre, re.VERBOSE)
+ lexer.lexindexfunc = lextab._lextab
+ for i in range(len(lextab._lextab)):
+ t = lexer.lexindexfunc[i]
+ if t:
+ if t[0]:
+ lexer.lexindexfunc[i] = (fdict[t[0]],t[1])
+ lexer.lextokens = lextab._lextokens
+ lexer.lexignore = lextab._lexignore
+ if lextab._lexerrorf:
+ lexer.lexerrorf = fdict[lextab._lexerrorf]
+
+# -----------------------------------------------------------------------------
+# lex(module)
+#
+# Build all of the regular expression rules from definitions in the supplied module
+# -----------------------------------------------------------------------------
+def lex(module=None,debug=0,optimize=0,lextab="lextab"):
+ ldict = None
+ regex = ""
+ error = 0
+ files = { }
+ lexer = Lexer()
+ lexer.debug = debug
+ lexer.optimize = optimize
+ global token,input
+
+ if module:
+ if not isinstance(module, types.ModuleType):
+ raise ValueError,"Expected a module"
+
+ ldict = module.__dict__
+
+ else:
+ # No module given. We might be able to get information from the caller.
+ try:
+ raise RuntimeError
+ except RuntimeError:
+ e,b,t = sys.exc_info()
+ f = t.tb_frame
+ f = f.f_back # Walk out to our calling function
+ ldict = f.f_globals # Grab its globals dictionary
+
+ if optimize and lextab:
+ try:
+ _read_lextab(lexer,ldict, lextab)
+ if not lexer.lexignore: lexer.lexignore = ""
+ token = lexer.token
+ input = lexer.input
+ return lexer
+
+ except ImportError:
+ pass
+
+ # Get the tokens map
+ tokens = ldict.get("tokens",None)
+ if not tokens:
+ raise SyntaxError,"lex: module does not define 'tokens'"
+ if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
+ raise SyntaxError,"lex: tokens must be a list or tuple."
+
+ # Build a dictionary of valid token names
+ lexer.lextokens = { }
+ if not optimize:
+
+ # Utility function for verifying tokens
+ def is_identifier(s):
+ for c in s:
+ if not (c.isalnum() or c == '_'): return 0
+ return 1
+
+ for n in tokens:
+ if not is_identifier(n):
+ print "lex: Bad token name '%s'" % n
+ error = 1
+ if lexer.lextokens.has_key(n):
+ print "lex: Warning. Token '%s' multiply defined." % n
+ lexer.lextokens[n] = None
+ else:
+ for n in tokens: lexer.lextokens[n] = None
+
+
+ if debug:
+ print "lex: tokens = '%s'" % lexer.lextokens.keys()
+
+ # Get a list of symbols with the t_ prefix
+ tsymbols = [f for f in ldict.keys() if f[:2] == 't_']
+
+ # Now build up a list of functions and a list of strings
+ fsymbols = [ ]
+ ssymbols = [ ]
+ for f in tsymbols:
+ if isinstance(ldict[f],types.FunctionType):
+ fsymbols.append(ldict[f])
+ elif isinstance(ldict[f],types.StringType):
+ ssymbols.append((f,ldict[f]))
+ else:
+ print "lex: %s not defined as a function or string" % f
+ error = 1
+
+ # Sort the functions by line number
+ fsymbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
+
+ # Sort the strings by regular expression length
+ ssymbols.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1])))
+
+ # Check for non-empty symbols
+ if len(fsymbols) == 0 and len(ssymbols) == 0:
+ raise SyntaxError,"lex: no rules of the form t_rulename are defined."
+
+ # Add all of the rules defined with actions first
+ for f in fsymbols:
+
+ line = f.func_code.co_firstlineno
+ file = f.func_code.co_filename
+ files[file] = None
+
+ if not optimize:
+ if f.func_code.co_argcount > 1:
+ print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__)
+ error = 1
+ continue
+
+ if f.func_code.co_argcount < 1:
+ print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__)
+ error = 1
+ continue
+
+ if f.__name__ == 't_ignore':
+ print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__)
+ error = 1
+ continue
+
+ if f.__name__ == 't_error':
+ lexer.lexerrorf = f
+ continue
+
+ if f.__doc__:
+ if not optimize:
+ try:
+ c = re.compile(f.__doc__, re.VERBOSE)
+ except re.error,e:
+ print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e)
+ error = 1
+ continue
+
+ if debug:
+ print "lex: Adding rule %s -> '%s'" % (f.__name__,f.__doc__)
+
+ # Okay. The regular expression seemed okay. Let's append it to the master regular
+ # expression we're building
+
+ if (regex): regex += "|"
+ regex += "(?P<%s>%s)" % (f.__name__,f.__doc__)
+ else:
+ print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__)
+
+ # Now add all of the simple rules
+ for name,r in ssymbols:
+
+ if name == 't_ignore':
+ lexer.lexignore = r
+ continue
+
+ if not optimize:
+ if name == 't_error':
+ raise SyntaxError,"lex: Rule 't_error' must be defined as a function"
+ error = 1
+ continue
+
+ if not lexer.lextokens.has_key(name[2:]):
+ print "lex: Rule '%s' defined for an unspecified token %s." % (name,name[2:])
+ error = 1
+ continue
+ try:
+ c = re.compile(r,re.VERBOSE)
+ except re.error,e:
+ print "lex: Invalid regular expression for rule '%s'. %s" % (name,e)
+ error = 1
+ continue
+ if debug:
+ print "lex: Adding rule %s -> '%s'" % (name,r)
+
+ if regex: regex += "|"
+ regex += "(?P<%s>%s)" % (name,r)
+
+ if not optimize:
+ for f in files.keys():
+ if not validate_file(f):
+ error = 1
+ try:
+ if debug:
+ print "lex: regex = '%s'" % regex
+ lexer.lexre = re.compile(regex, re.VERBOSE)
+
+ # Build the index to function map for the matching engine
+ lexer.lexindexfunc = [ None ] * (max(lexer.lexre.groupindex.values())+1)
+ for f,i in lexer.lexre.groupindex.items():
+ handle = ldict[f]
+ if isinstance(handle,types.FunctionType):
+ lexer.lexindexfunc[i] = (handle,handle.__name__[2:])
+ else:
+ # If rule was specified as a string, we build an anonymous
+ # callback function to carry out the action
+ lexer.lexindexfunc[i] = (None,f[2:])
+
+ # If a lextab was specified, we create a file containing the precomputed
+ # regular expression and index table
+
+ if lextab and optimize:
+ lt = open(lextab+".py","w")
+ lt.write("# %s.py. This file automatically created by PLY. Don't edit.\n" % lextab)
+ lt.write("_lexre = %s\n" % repr(regex))
+ lt.write("_lextab = [\n");
+ for i in range(0,len(lexer.lexindexfunc)):
+ t = lexer.lexindexfunc[i]
+ if t:
+ if t[0]:
+ lt.write(" ('%s',%s),\n"% (t[0].__name__, repr(t[1])))
+ else:
+ lt.write(" (None,%s),\n" % repr(t[1]))
+ else:
+ lt.write(" None,\n")
+
+ lt.write("]\n");
+ lt.write("_lextokens = %s\n" % repr(lexer.lextokens))
+ lt.write("_lexignore = %s\n" % repr(lexer.lexignore))
+ if (lexer.lexerrorf):
+ lt.write("_lexerrorf = %s\n" % repr(lexer.lexerrorf.__name__))
+ else:
+ lt.write("_lexerrorf = None\n")
+ lt.close()
+
+ except re.error,e:
+ print "lex: Fatal error. Unable to compile regular expression rules. %s" % e
+ error = 1
+ if error:
+ raise SyntaxError,"lex: Unable to build lexer."
+ if not lexer.lexerrorf:
+ print "lex: Warning. no t_error rule is defined."
+
+ if not lexer.lexignore: lexer.lexignore = ""
+
+ # Create global versions of the token() and input() functions
+ token = lexer.token
+ input = lexer.input
+
+ return lexer
+
+# -----------------------------------------------------------------------------
+# run()
+#
+# This runs the lexer as a main program
+# -----------------------------------------------------------------------------
+
+def runmain(lexer=None,data=None):
+ if not data:
+ try:
+ filename = sys.argv[1]
+ f = open(filename)
+ data = f.read()
+ f.close()
+ except IndexError:
+ print "Reading from standard input (type EOF to end):"
+ data = sys.stdin.read()
+
+ if lexer:
+ _input = lexer.input
+ else:
+ _input = input
+ _input(data)
+ if lexer:
+ _token = lexer.token
+ else:
+ _token = token
+
+ while 1:
+ tok = _token()
+ if not tok: break
+ print "(%s,'%s',%d)" % (tok.type, tok.value, tok.lineno)
+
+
+
+
diff --git a/ext/ply/test/README b/ext/ply/test/README
new file mode 100644
index 000000000..bca748497
--- /dev/null
+++ b/ext/ply/test/README
@@ -0,0 +1,9 @@
+This directory mostly contains tests for various types of error
+conditions. To run:
+
+ $ python testlex.py .
+ $ python testyacc.py .
+
+(make sure lex.py and yacc.py exist in this directory before
+running the tests).
+
diff --git a/ext/ply/test/calclex.py b/ext/ply/test/calclex.py
new file mode 100644
index 000000000..f8eb91a09
--- /dev/null
+++ b/ext/ply/test/calclex.py
@@ -0,0 +1,46 @@
+# -----------------------------------------------------------------------------
+# calclex.py
+# -----------------------------------------------------------------------------
+
+tokens = (
+ 'NAME','NUMBER',
+ 'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
+ 'LPAREN','RPAREN',
+ )
+
+# Tokens
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_TIMES = r'\*'
+t_DIVIDE = r'/'
+t_EQUALS = r'='
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
+
+def t_NUMBER(t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Integer value too large", t.value
+ t.value = 0
+ return t
+
+t_ignore = " \t"
+
+def t_newline(t):
+ r'\n+'
+ t.lineno += t.value.count("\n")
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.skip(1)
+
+# Build the lexer
+import lex
+lex.lex()
+
+
+
diff --git a/ext/ply/test/lex_doc1.exp b/ext/ply/test/lex_doc1.exp
new file mode 100644
index 000000000..29381911d
--- /dev/null
+++ b/ext/ply/test/lex_doc1.exp
@@ -0,0 +1 @@
+./lex_doc1.py:15: No regular expression defined for rule 't_NUMBER'
diff --git a/ext/ply/test/lex_doc1.py b/ext/ply/test/lex_doc1.py
new file mode 100644
index 000000000..fb0fb885e
--- /dev/null
+++ b/ext/ply/test/lex_doc1.py
@@ -0,0 +1,27 @@
+# lex_token.py
+#
+# Missing documentation string
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+def t_NUMBER(t):
+ pass
+
+def t_error(t):
+ pass
+
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_dup1.exp b/ext/ply/test/lex_dup1.exp
new file mode 100644
index 000000000..22bca3190
--- /dev/null
+++ b/ext/ply/test/lex_dup1.exp
@@ -0,0 +1,2 @@
+./lex_dup1.py:17: Rule t_NUMBER redefined. Previously defined on line 15
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_dup1.py b/ext/ply/test/lex_dup1.py
new file mode 100644
index 000000000..88bbe00e9
--- /dev/null
+++ b/ext/ply/test/lex_dup1.py
@@ -0,0 +1,27 @@
+# lex_token.py
+#
+# Duplicated rule specifiers
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+t_NUMBER = r'\d+'
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_dup2.exp b/ext/ply/test/lex_dup2.exp
new file mode 100644
index 000000000..883bdad46
--- /dev/null
+++ b/ext/ply/test/lex_dup2.exp
@@ -0,0 +1,2 @@
+./lex_dup2.py:19: Rule t_NUMBER redefined. Previously defined on line 15
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_dup2.py b/ext/ply/test/lex_dup2.py
new file mode 100644
index 000000000..65e0b21a2
--- /dev/null
+++ b/ext/ply/test/lex_dup2.py
@@ -0,0 +1,31 @@
+# lex_token.py
+#
+# Duplicated rule specifiers
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+def t_NUMBER(t):
+ r'\d+'
+ pass
+
+def t_NUMBER(t):
+ r'\d+'
+ pass
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_dup3.exp b/ext/ply/test/lex_dup3.exp
new file mode 100644
index 000000000..916612aa1
--- /dev/null
+++ b/ext/ply/test/lex_dup3.exp
@@ -0,0 +1,2 @@
+./lex_dup3.py:17: Rule t_NUMBER redefined. Previously defined on line 15
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_dup3.py b/ext/ply/test/lex_dup3.py
new file mode 100644
index 000000000..424101823
--- /dev/null
+++ b/ext/ply/test/lex_dup3.py
@@ -0,0 +1,29 @@
+# lex_token.py
+#
+# Duplicated rule specifiers
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+def t_NUMBER(t):
+ r'\d+'
+ pass
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_empty.exp b/ext/ply/test/lex_empty.exp
new file mode 100644
index 000000000..af38602d5
--- /dev/null
+++ b/ext/ply/test/lex_empty.exp
@@ -0,0 +1 @@
+SyntaxError: lex: no rules of the form t_rulename are defined.
diff --git a/ext/ply/test/lex_empty.py b/ext/ply/test/lex_empty.py
new file mode 100644
index 000000000..6472832f1
--- /dev/null
+++ b/ext/ply/test/lex_empty.py
@@ -0,0 +1,18 @@
+# lex_token.py
+#
+# No rules defined
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_error1.exp b/ext/ply/test/lex_error1.exp
new file mode 100644
index 000000000..baa19e5b3
--- /dev/null
+++ b/ext/ply/test/lex_error1.exp
@@ -0,0 +1 @@
+lex: Warning. no t_error rule is defined.
diff --git a/ext/ply/test/lex_error1.py b/ext/ply/test/lex_error1.py
new file mode 100644
index 000000000..ed7980346
--- /dev/null
+++ b/ext/ply/test/lex_error1.py
@@ -0,0 +1,22 @@
+# lex_token.py
+#
+# Missing t_error() rule
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_error2.exp b/ext/ply/test/lex_error2.exp
new file mode 100644
index 000000000..fb1b55c8b
--- /dev/null
+++ b/ext/ply/test/lex_error2.exp
@@ -0,0 +1 @@
+SyntaxError: lex: Rule 't_error' must be defined as a function
diff --git a/ext/ply/test/lex_error2.py b/ext/ply/test/lex_error2.py
new file mode 100644
index 000000000..80020f72b
--- /dev/null
+++ b/ext/ply/test/lex_error2.py
@@ -0,0 +1,24 @@
+# lex_token.py
+#
+# t_error defined, but not function
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+t_error = "foo"
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_error3.exp b/ext/ply/test/lex_error3.exp
new file mode 100644
index 000000000..936828f93
--- /dev/null
+++ b/ext/ply/test/lex_error3.exp
@@ -0,0 +1,2 @@
+./lex_error3.py:17: Rule 't_error' requires an argument.
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_error3.py b/ext/ply/test/lex_error3.py
new file mode 100644
index 000000000..46facf589
--- /dev/null
+++ b/ext/ply/test/lex_error3.py
@@ -0,0 +1,25 @@
+# lex_token.py
+#
+# t_error defined as function, but with wrong # args
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+def t_error():
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_error4.exp b/ext/ply/test/lex_error4.exp
new file mode 100644
index 000000000..242516576
--- /dev/null
+++ b/ext/ply/test/lex_error4.exp
@@ -0,0 +1,2 @@
+./lex_error4.py:17: Rule 't_error' has too many arguments.
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_error4.py b/ext/ply/test/lex_error4.py
new file mode 100644
index 000000000..d777fee84
--- /dev/null
+++ b/ext/ply/test/lex_error4.py
@@ -0,0 +1,25 @@
+# lex_token.py
+#
+# t_error defined as function, but too many args
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+def t_error(t,s):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_hedit.exp b/ext/ply/test/lex_hedit.exp
new file mode 100644
index 000000000..0b09827c6
--- /dev/null
+++ b/ext/ply/test/lex_hedit.exp
@@ -0,0 +1,3 @@
+(H_EDIT_DESCRIPTOR,'abc',1)
+(H_EDIT_DESCRIPTOR,'abcdefghij',1)
+(H_EDIT_DESCRIPTOR,'xy',1)
diff --git a/ext/ply/test/lex_hedit.py b/ext/ply/test/lex_hedit.py
new file mode 100644
index 000000000..68f9fcbd1
--- /dev/null
+++ b/ext/ply/test/lex_hedit.py
@@ -0,0 +1,44 @@
+# -----------------------------------------------------------------------------
+# hedit.py
+#
+# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson)
+#
+# These tokens can't be easily tokenized because they are of the following
+# form:
+#
+# nHc1...cn
+#
+# where n is a positive integer and c1 ... cn are characters.
+#
+# This example shows how to modify the state of the lexer to parse
+# such tokens
+# -----------------------------------------------------------------------------
+
+tokens = (
+ 'H_EDIT_DESCRIPTOR',
+ )
+
+# Tokens
+t_ignore = " \t\n"
+
+def t_H_EDIT_DESCRIPTOR(t):
+ r"\d+H.*" # This grabs all of the remaining text
+ i = t.value.index('H')
+ n = eval(t.value[:i])
+
+ # Adjust the tokenizing position
+ t.lexer.lexpos -= len(t.value) - (i+1+n)
+ t.value = t.value[i+1:i+1+n]
+ return t
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.skip(1)
+
+# Build the lexer
+import lex
+lex.lex()
+lex.runmain(data="3Habc 10Habcdefghij 2Hxy")
+
+
+
diff --git a/ext/ply/test/lex_ignore.exp b/ext/ply/test/lex_ignore.exp
new file mode 100644
index 000000000..c3b04a154
--- /dev/null
+++ b/ext/ply/test/lex_ignore.exp
@@ -0,0 +1,2 @@
+./lex_ignore.py:17: Rule 't_ignore' must be defined as a string.
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_ignore.py b/ext/ply/test/lex_ignore.py
new file mode 100644
index 000000000..49c303f81
--- /dev/null
+++ b/ext/ply/test/lex_ignore.py
@@ -0,0 +1,29 @@
+# lex_token.py
+#
+# Improperly specific ignore declaration
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+def t_ignore(t):
+ ' \t'
+ pass
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_re1.exp b/ext/ply/test/lex_re1.exp
new file mode 100644
index 000000000..634eefefe
--- /dev/null
+++ b/ext/ply/test/lex_re1.exp
@@ -0,0 +1,2 @@
+lex: Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_re1.py b/ext/ply/test/lex_re1.py
new file mode 100644
index 000000000..4a055ad72
--- /dev/null
+++ b/ext/ply/test/lex_re1.py
@@ -0,0 +1,25 @@
+# lex_token.py
+#
+# Bad regular expression in a string
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'(\d+'
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_rule1.exp b/ext/ply/test/lex_rule1.exp
new file mode 100644
index 000000000..0c23ca294
--- /dev/null
+++ b/ext/ply/test/lex_rule1.exp
@@ -0,0 +1,2 @@
+lex: t_NUMBER not defined as a function or string
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_rule1.py b/ext/ply/test/lex_rule1.py
new file mode 100644
index 000000000..ff3764ea1
--- /dev/null
+++ b/ext/ply/test/lex_rule1.py
@@ -0,0 +1,25 @@
+# lex_token.py
+#
+# Rule defined as some other type
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = 1
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_token1.exp b/ext/ply/test/lex_token1.exp
new file mode 100644
index 000000000..3792831fa
--- /dev/null
+++ b/ext/ply/test/lex_token1.exp
@@ -0,0 +1 @@
+SyntaxError: lex: module does not define 'tokens'
diff --git a/ext/ply/test/lex_token1.py b/ext/ply/test/lex_token1.py
new file mode 100644
index 000000000..e8eca2b63
--- /dev/null
+++ b/ext/ply/test/lex_token1.py
@@ -0,0 +1,19 @@
+# lex_token.py
+#
+# Tests for absence of tokens variable
+
+import lex
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_token2.exp b/ext/ply/test/lex_token2.exp
new file mode 100644
index 000000000..3f98fe51d
--- /dev/null
+++ b/ext/ply/test/lex_token2.exp
@@ -0,0 +1 @@
+SyntaxError: lex: tokens must be a list or tuple.
diff --git a/ext/ply/test/lex_token2.py b/ext/ply/test/lex_token2.py
new file mode 100644
index 000000000..38b34dabe
--- /dev/null
+++ b/ext/ply/test/lex_token2.py
@@ -0,0 +1,21 @@
+# lex_token.py
+#
+# Tests for tokens of wrong type
+
+import lex
+
+tokens = "PLUS MINUS NUMBER"
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_token3.exp b/ext/ply/test/lex_token3.exp
new file mode 100644
index 000000000..d991d3c37
--- /dev/null
+++ b/ext/ply/test/lex_token3.exp
@@ -0,0 +1,2 @@
+lex: Rule 't_MINUS' defined for an unspecified token MINUS.
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_token3.py b/ext/ply/test/lex_token3.py
new file mode 100644
index 000000000..909f9180d
--- /dev/null
+++ b/ext/ply/test/lex_token3.py
@@ -0,0 +1,24 @@
+# lex_token.py
+#
+# tokens is right type, but is missing a token for one rule
+
+import lex
+
+tokens = [
+ "PLUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_token4.exp b/ext/ply/test/lex_token4.exp
new file mode 100644
index 000000000..3dd88e05a
--- /dev/null
+++ b/ext/ply/test/lex_token4.exp
@@ -0,0 +1,2 @@
+lex: Bad token name '-'
+SyntaxError: lex: Unable to build lexer.
diff --git a/ext/ply/test/lex_token4.py b/ext/ply/test/lex_token4.py
new file mode 100644
index 000000000..d77d1662c
--- /dev/null
+++ b/ext/ply/test/lex_token4.py
@@ -0,0 +1,26 @@
+# lex_token.py
+#
+# Bad token name
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "-",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_NUMBER = r'\d+'
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+
+
diff --git a/ext/ply/test/lex_token5.exp b/ext/ply/test/lex_token5.exp
new file mode 100644
index 000000000..d7bcb2e7c
--- /dev/null
+++ b/ext/ply/test/lex_token5.exp
@@ -0,0 +1 @@
+lex.LexError: ./lex_token5.py:16: Rule 't_NUMBER' returned an unknown token type 'NUM'
diff --git a/ext/ply/test/lex_token5.py b/ext/ply/test/lex_token5.py
new file mode 100644
index 000000000..d9b0c96aa
--- /dev/null
+++ b/ext/ply/test/lex_token5.py
@@ -0,0 +1,31 @@
+# lex_token.py
+#
+# Return a bad token name
+
+import lex
+
+tokens = [
+ "PLUS",
+ "MINUS",
+ "NUMBER",
+ ]
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+
+def t_NUMBER(t):
+ r'\d+'
+ t.type = "NUM"
+ return t
+
+def t_error(t):
+ pass
+
+import sys
+sys.tracebacklimit = 0
+
+lex.lex()
+lex.input("1234")
+t = lex.token()
+
+
diff --git a/ext/ply/test/testlex.py b/ext/ply/test/testlex.py
new file mode 100755
index 000000000..df000b83d
--- /dev/null
+++ b/ext/ply/test/testlex.py
@@ -0,0 +1,57 @@
+#!/usr/local/bin
+# ----------------------------------------------------------------------
+# testlex.py
+#
+# Run tests for the lexing module
+# ----------------------------------------------------------------------
+
+import sys,os,glob
+
+if len(sys.argv) < 2:
+ print "Usage: python testlex.py directory"
+ raise SystemExit
+
+dirname = None
+make = 0
+
+for o in sys.argv[1:]:
+ if o == '-make':
+ make = 1
+ else:
+ dirname = o
+ break
+
+if not dirname:
+ print "Usage: python testlex.py [-make] directory"
+ raise SystemExit
+
+f = glob.glob("%s/%s" % (dirname,"lex_*.py"))
+
+print "**** Running tests for lex ****"
+
+for t in f:
+ name = t[:-3]
+ print "Testing %-32s" % name,
+ if make:
+ if not os.path.exists("%s.exp" % name):
+ os.system("python %s.py >%s.exp 2>&1" % (name,name))
+ passed = 1
+ else:
+ os.system("python %s.py >%s.out 2>&1" % (name,name))
+ a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name))
+ if a == 0:
+ passed = 1
+ else:
+ passed = 0
+
+ if passed:
+ print "Passed"
+ else:
+ print "Failed. See %s.dif" % name
+
+
+
+
+
+
+
diff --git a/ext/ply/test/testyacc.py b/ext/ply/test/testyacc.py
new file mode 100644
index 000000000..a185cbb29
--- /dev/null
+++ b/ext/ply/test/testyacc.py
@@ -0,0 +1,58 @@
+#!/usr/local/bin
+# ----------------------------------------------------------------------
+# testyacc.py
+#
+# Run tests for the yacc module
+# ----------------------------------------------------------------------
+
+import sys,os,glob
+
+if len(sys.argv) < 2:
+ print "Usage: python testyacc.py directory"
+ raise SystemExit
+
+dirname = None
+make = 0
+
+for o in sys.argv[1:]:
+ if o == '-make':
+ make = 1
+ else:
+ dirname = o
+ break
+
+if not dirname:
+ print "Usage: python testyacc.py [-make] directory"
+ raise SystemExit
+
+f = glob.glob("%s/%s" % (dirname,"yacc_*.py"))
+
+print "**** Running tests for yacc ****"
+
+for t in f:
+ name = t[:-3]
+ print "Testing %-32s" % name,
+ os.system("rm -f %s/parsetab.*" % dirname)
+ if make:
+ if not os.path.exists("%s.exp" % name):
+ os.system("python %s.py >%s.exp 2>&1" % (name,name))
+ passed = 1
+ else:
+ os.system("python %s.py >%s.out 2>&1" % (name,name))
+ a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name))
+ if a == 0:
+ passed = 1
+ else:
+ passed = 0
+
+ if passed:
+ print "Passed"
+ else:
+ print "Failed. See %s.dif" % name
+
+
+
+
+
+
+
diff --git a/ext/ply/test/yacc_badargs.exp b/ext/ply/test/yacc_badargs.exp
new file mode 100644
index 000000000..b145c51f2
--- /dev/null
+++ b/ext/ply/test/yacc_badargs.exp
@@ -0,0 +1,3 @@
+./yacc_badargs.py:21: Rule 'p_statement_assign' has too many arguments.
+./yacc_badargs.py:25: Rule 'p_statement_expr' requires an argument.
+yacc.YaccError: Unable to construct parser.
diff --git a/ext/ply/test/yacc_badargs.py b/ext/ply/test/yacc_badargs.py
new file mode 100644
index 000000000..12075efcc
--- /dev/null
+++ b/ext/ply/test/yacc_badargs.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_badargs.py
+#
+# Rules with wrong # args
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t,s):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr():
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_badprec.exp b/ext/ply/test/yacc_badprec.exp
new file mode 100644
index 000000000..7764b0246
--- /dev/null
+++ b/ext/ply/test/yacc_badprec.exp
@@ -0,0 +1 @@
+yacc.YaccError: precedence must be a list or tuple.
diff --git a/ext/ply/test/yacc_badprec.py b/ext/ply/test/yacc_badprec.py
new file mode 100644
index 000000000..55bf7720d
--- /dev/null
+++ b/ext/ply/test/yacc_badprec.py
@@ -0,0 +1,63 @@
+# -----------------------------------------------------------------------------
+# yacc_badprec.py
+#
+# Bad precedence specifier
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = "blah"
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_badprec2.exp b/ext/ply/test/yacc_badprec2.exp
new file mode 100644
index 000000000..1df1427b2
--- /dev/null
+++ b/ext/ply/test/yacc_badprec2.exp
@@ -0,0 +1,3 @@
+yacc: Invalid precedence table.
+yacc: Generating SLR parsing table...
+yacc: 4 shift/reduce conflicts
diff --git a/ext/ply/test/yacc_badprec2.py b/ext/ply/test/yacc_badprec2.py
new file mode 100644
index 000000000..9cbc99827
--- /dev/null
+++ b/ext/ply/test/yacc_badprec2.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_badprec2.py
+#
+# Bad precedence
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ 42,
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_badrule.exp b/ext/ply/test/yacc_badrule.exp
new file mode 100644
index 000000000..553779778
--- /dev/null
+++ b/ext/ply/test/yacc_badrule.exp
@@ -0,0 +1,5 @@
+./yacc_badrule.py:22: Syntax error. Expected ':'
+./yacc_badrule.py:26: Syntax error in rule 'statement'
+./yacc_badrule.py:31: Syntax error. Expected ':'
+./yacc_badrule.py:40: Syntax error. Expected ':'
+yacc.YaccError: Unable to construct parser.
diff --git a/ext/ply/test/yacc_badrule.py b/ext/ply/test/yacc_badrule.py
new file mode 100644
index 000000000..cad3a967e
--- /dev/null
+++ b/ext/ply/test/yacc_badrule.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_badrule.py
+#
+# Syntax problems in the rule strings
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression: MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_badtok.exp b/ext/ply/test/yacc_badtok.exp
new file mode 100644
index 000000000..f6e64726c
--- /dev/null
+++ b/ext/ply/test/yacc_badtok.exp
@@ -0,0 +1 @@
+yacc.YaccError: tokens must be a list or tuple.
diff --git a/ext/ply/test/yacc_badtok.py b/ext/ply/test/yacc_badtok.py
new file mode 100644
index 000000000..a17d26aaa
--- /dev/null
+++ b/ext/ply/test/yacc_badtok.py
@@ -0,0 +1,68 @@
+# -----------------------------------------------------------------------------
+# yacc_badtok.py
+#
+# A grammar, but tokens is a bad datatype
+# -----------------------------------------------------------------------------
+
+import sys
+sys.tracebacklimit = 0
+
+tokens = "Hello"
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_dup.exp b/ext/ply/test/yacc_dup.exp
new file mode 100644
index 000000000..99f3fe22c
--- /dev/null
+++ b/ext/ply/test/yacc_dup.exp
@@ -0,0 +1,4 @@
+./yacc_dup.py:25: Function p_statement redefined. Previously defined on line 21
+yacc: Warning. Token 'EQUALS' defined, but not used.
+yacc: Warning. There is 1 unused token.
+yacc: Generating SLR parsing table...
diff --git a/ext/ply/test/yacc_dup.py b/ext/ply/test/yacc_dup.py
new file mode 100644
index 000000000..557cd0ae1
--- /dev/null
+++ b/ext/ply/test/yacc_dup.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_dup.py
+#
+# Duplicated rule name
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_error1.exp b/ext/ply/test/yacc_error1.exp
new file mode 100644
index 000000000..980fc905c
--- /dev/null
+++ b/ext/ply/test/yacc_error1.exp
@@ -0,0 +1 @@
+yacc.YaccError: ./yacc_error1.py:59: p_error() requires 1 argument.
diff --git a/ext/ply/test/yacc_error1.py b/ext/ply/test/yacc_error1.py
new file mode 100644
index 000000000..413004520
--- /dev/null
+++ b/ext/ply/test/yacc_error1.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_error1.py
+#
+# Bad p_error() function
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t,s):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_error2.exp b/ext/ply/test/yacc_error2.exp
new file mode 100644
index 000000000..d0573b4dd
--- /dev/null
+++ b/ext/ply/test/yacc_error2.exp
@@ -0,0 +1 @@
+yacc.YaccError: ./yacc_error2.py:59: p_error() requires 1 argument.
diff --git a/ext/ply/test/yacc_error2.py b/ext/ply/test/yacc_error2.py
new file mode 100644
index 000000000..d4fd1d219
--- /dev/null
+++ b/ext/ply/test/yacc_error2.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_error1.py
+#
+# Bad p_error() function
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error():
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_error3.exp b/ext/ply/test/yacc_error3.exp
new file mode 100644
index 000000000..31eaee754
--- /dev/null
+++ b/ext/ply/test/yacc_error3.exp
@@ -0,0 +1 @@
+yacc.YaccError: 'p_error' defined, but is not a function.
diff --git a/ext/ply/test/yacc_error3.py b/ext/ply/test/yacc_error3.py
new file mode 100644
index 000000000..7093fab48
--- /dev/null
+++ b/ext/ply/test/yacc_error3.py
@@ -0,0 +1,66 @@
+# -----------------------------------------------------------------------------
+# yacc_error1.py
+#
+# Bad p_error() function
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+p_error = "blah"
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_inf.exp b/ext/ply/test/yacc_inf.exp
new file mode 100644
index 000000000..a7f47dada
--- /dev/null
+++ b/ext/ply/test/yacc_inf.exp
@@ -0,0 +1,5 @@
+yacc: Warning. Token 'NUMBER' defined, but not used.
+yacc: Warning. There is 1 unused token.
+yacc: Infinite recursion detected for symbol 'statement'.
+yacc: Infinite recursion detected for symbol 'expression'.
+yacc.YaccError: Unable to construct parser.
diff --git a/ext/ply/test/yacc_inf.py b/ext/ply/test/yacc_inf.py
new file mode 100644
index 000000000..885e2c4df
--- /dev/null
+++ b/ext/ply/test/yacc_inf.py
@@ -0,0 +1,55 @@
+# -----------------------------------------------------------------------------
+# yacc_inf.py
+#
+# Infinite recursion
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_missing1.exp b/ext/ply/test/yacc_missing1.exp
new file mode 100644
index 000000000..065d6a54a
--- /dev/null
+++ b/ext/ply/test/yacc_missing1.exp
@@ -0,0 +1,2 @@
+./yacc_missing1.py:22: Symbol 'location' used, but not defined as a token or a rule.
+yacc.YaccError: Unable to construct parser.
diff --git a/ext/ply/test/yacc_missing1.py b/ext/ply/test/yacc_missing1.py
new file mode 100644
index 000000000..e63904d0e
--- /dev/null
+++ b/ext/ply/test/yacc_missing1.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_missing1.py
+#
+# Grammar with a missing rule
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : location EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_nodoc.exp b/ext/ply/test/yacc_nodoc.exp
new file mode 100644
index 000000000..3f52a3287
--- /dev/null
+++ b/ext/ply/test/yacc_nodoc.exp
@@ -0,0 +1,2 @@
+./yacc_nodoc.py:25: No documentation string specified in function 'p_statement_expr'
+yacc: Generating SLR parsing table...
diff --git a/ext/ply/test/yacc_nodoc.py b/ext/ply/test/yacc_nodoc.py
new file mode 100644
index 000000000..e3941bdaa
--- /dev/null
+++ b/ext/ply/test/yacc_nodoc.py
@@ -0,0 +1,66 @@
+# -----------------------------------------------------------------------------
+# yacc_nodoc.py
+#
+# Rule with a missing doc-string
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_noerror.exp b/ext/ply/test/yacc_noerror.exp
new file mode 100644
index 000000000..986fa31fa
--- /dev/null
+++ b/ext/ply/test/yacc_noerror.exp
@@ -0,0 +1,2 @@
+yacc: Warning. no p_error() function is defined.
+yacc: Generating SLR parsing table...
diff --git a/ext/ply/test/yacc_noerror.py b/ext/ply/test/yacc_noerror.py
new file mode 100644
index 000000000..d92f48ea6
--- /dev/null
+++ b/ext/ply/test/yacc_noerror.py
@@ -0,0 +1,64 @@
+# -----------------------------------------------------------------------------
+# yacc_noerror.py
+#
+# No p_error() rule defined.
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_nop.exp b/ext/ply/test/yacc_nop.exp
new file mode 100644
index 000000000..062878b9e
--- /dev/null
+++ b/ext/ply/test/yacc_nop.exp
@@ -0,0 +1,2 @@
+./yacc_nop.py:25: Warning. Possible grammar rule 'statement_expr' defined without p_ prefix.
+yacc: Generating SLR parsing table...
diff --git a/ext/ply/test/yacc_nop.py b/ext/ply/test/yacc_nop.py
new file mode 100644
index 000000000..c599ffd5d
--- /dev/null
+++ b/ext/ply/test/yacc_nop.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_nop.py
+#
+# Possible grammar rule defined without p_ prefix
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_notfunc.exp b/ext/ply/test/yacc_notfunc.exp
new file mode 100644
index 000000000..271167341
--- /dev/null
+++ b/ext/ply/test/yacc_notfunc.exp
@@ -0,0 +1,4 @@
+yacc: Warning. 'p_statement_assign' not defined as a function
+yacc: Warning. Token 'EQUALS' defined, but not used.
+yacc: Warning. There is 1 unused token.
+yacc: Generating SLR parsing table...
diff --git a/ext/ply/test/yacc_notfunc.py b/ext/ply/test/yacc_notfunc.py
new file mode 100644
index 000000000..f61663d60
--- /dev/null
+++ b/ext/ply/test/yacc_notfunc.py
@@ -0,0 +1,65 @@
+# -----------------------------------------------------------------------------
+# yacc_notfunc.py
+#
+# p_rule not defined as a function
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+p_statement_assign = "Blah"
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_notok.exp b/ext/ply/test/yacc_notok.exp
new file mode 100644
index 000000000..708f6f597
--- /dev/null
+++ b/ext/ply/test/yacc_notok.exp
@@ -0,0 +1 @@
+yacc.YaccError: module does not define a list 'tokens'
diff --git a/ext/ply/test/yacc_notok.py b/ext/ply/test/yacc_notok.py
new file mode 100644
index 000000000..dfa0059be
--- /dev/null
+++ b/ext/ply/test/yacc_notok.py
@@ -0,0 +1,66 @@
+# -----------------------------------------------------------------------------
+# yacc_notok.py
+#
+# A grammar, but we forgot to import the tokens list
+# -----------------------------------------------------------------------------
+
+import sys
+sys.tracebacklimit = 0
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_rr.exp b/ext/ply/test/yacc_rr.exp
new file mode 100644
index 000000000..0ec556d16
--- /dev/null
+++ b/ext/ply/test/yacc_rr.exp
@@ -0,0 +1,2 @@
+yacc: Generating SLR parsing table...
+yacc: 1 reduce/reduce conflict
diff --git a/ext/ply/test/yacc_rr.py b/ext/ply/test/yacc_rr.py
new file mode 100644
index 000000000..c061c2c17
--- /dev/null
+++ b/ext/ply/test/yacc_rr.py
@@ -0,0 +1,71 @@
+# -----------------------------------------------------------------------------
+# yacc_rr.py
+#
+# A grammar with a reduce/reduce conflict
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_assign_2(t):
+ 'statement : NAME EQUALS NUMBER'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_simple.exp b/ext/ply/test/yacc_simple.exp
new file mode 100644
index 000000000..de7964b6f
--- /dev/null
+++ b/ext/ply/test/yacc_simple.exp
@@ -0,0 +1 @@
+yacc: Generating SLR parsing table...
diff --git a/ext/ply/test/yacc_simple.py b/ext/ply/test/yacc_simple.py
new file mode 100644
index 000000000..7b4b40b17
--- /dev/null
+++ b/ext/ply/test/yacc_simple.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_simple.py
+#
+# A simple, properly specifier grammar
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_sr.exp b/ext/ply/test/yacc_sr.exp
new file mode 100644
index 000000000..7225ad94b
--- /dev/null
+++ b/ext/ply/test/yacc_sr.exp
@@ -0,0 +1,2 @@
+yacc: Generating SLR parsing table...
+yacc: 20 shift/reduce conflicts
diff --git a/ext/ply/test/yacc_sr.py b/ext/ply/test/yacc_sr.py
new file mode 100644
index 000000000..4341f6997
--- /dev/null
+++ b/ext/ply/test/yacc_sr.py
@@ -0,0 +1,62 @@
+# -----------------------------------------------------------------------------
+# yacc_sr.py
+#
+# A grammar with shift-reduce conflicts
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_term1.exp b/ext/ply/test/yacc_term1.exp
new file mode 100644
index 000000000..422d2bacd
--- /dev/null
+++ b/ext/ply/test/yacc_term1.exp
@@ -0,0 +1,2 @@
+./yacc_term1.py:22: Illegal rule name 'NUMBER'. Already defined as a token.
+yacc.YaccError: Unable to construct parser.
diff --git a/ext/ply/test/yacc_term1.py b/ext/ply/test/yacc_term1.py
new file mode 100644
index 000000000..97a2e7a60
--- /dev/null
+++ b/ext/ply/test/yacc_term1.py
@@ -0,0 +1,67 @@
+# -----------------------------------------------------------------------------
+# yacc_term1.py
+#
+# Terminal used on the left-hand-side
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'NUMBER : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_unused.exp b/ext/ply/test/yacc_unused.exp
new file mode 100644
index 000000000..390754de3
--- /dev/null
+++ b/ext/ply/test/yacc_unused.exp
@@ -0,0 +1,4 @@
+./yacc_unused.py:60: Symbol 'COMMA' used, but not defined as a token or a rule.
+yacc: Symbol 'COMMA' is unreachable.
+yacc: Symbol 'exprlist' is unreachable.
+yacc.YaccError: Unable to construct parser.
diff --git a/ext/ply/test/yacc_unused.py b/ext/ply/test/yacc_unused.py
new file mode 100644
index 000000000..4cbd63327
--- /dev/null
+++ b/ext/ply/test/yacc_unused.py
@@ -0,0 +1,76 @@
+# -----------------------------------------------------------------------------
+# yacc_unused.py
+#
+# A grammar with an unused rule
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_expr_list(t):
+ 'exprlist : exprlist COMMA expression'
+ pass
+
+def p_expr_list_2(t):
+ 'exprlist : expression'
+ pass
+
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/test/yacc_uprec.exp b/ext/ply/test/yacc_uprec.exp
new file mode 100644
index 000000000..b1a71a250
--- /dev/null
+++ b/ext/ply/test/yacc_uprec.exp
@@ -0,0 +1,2 @@
+./yacc_uprec.py:35: Nothing known about the precedence of 'UMINUS'
+yacc.YaccError: Unable to construct parser.
diff --git a/ext/ply/test/yacc_uprec.py b/ext/ply/test/yacc_uprec.py
new file mode 100644
index 000000000..139ce6318
--- /dev/null
+++ b/ext/ply/test/yacc_uprec.py
@@ -0,0 +1,62 @@
+# -----------------------------------------------------------------------------
+# yacc_uprec.py
+#
+# A grammar with a bad %prec specifier
+# -----------------------------------------------------------------------------
+import sys
+sys.tracebacklimit = 0
+
+from calclex import tokens
+
+# Parsing rules
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[3] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import yacc
+yacc.yacc()
+
+
+
+
diff --git a/ext/ply/yacc.py b/ext/ply/yacc.py
new file mode 100644
index 000000000..1041745ed
--- /dev/null
+++ b/ext/ply/yacc.py
@@ -0,0 +1,1846 @@
+#-----------------------------------------------------------------------------
+# ply: yacc.py
+#
+# Author: David M. Beazley (beazley@cs.uchicago.edu)
+# Department of Computer Science
+# University of Chicago
+# Chicago, IL 60637
+#
+# Copyright (C) 2001, David M. Beazley
+#
+# $Header: /home/stever/bk/newmem2/ext/ply/yacc.py 1.3 03/06/06 14:59:28-00:00 stever@ $
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# See the file COPYING for a complete copy of the LGPL.
+#
+#
+# This implements an LR parser that is constructed from grammar rules defined
+# as Python functions. Roughly speaking, this module is a cross between
+# John Aycock's Spark system and the GNU bison utility.
+#
+# Disclaimer: This is a work in progress. SLR parsing seems to work fairly
+# well and there is extensive error checking. LALR(1) is in progress. The
+# rest of this file is a bit of a mess. Please pardon the dust.
+#
+# The current implementation is only somewhat object-oriented. The
+# LR parser itself is defined in terms of an object (which allows multiple
+# parsers to co-exist). However, most of the variables used during table
+# construction are defined in terms of global variables. Users shouldn't
+# notice unless they are trying to define multiple parsers at the same
+# time using threads (in which case they should have their head examined).
+#-----------------------------------------------------------------------------
+
+__version__ = "1.3"
+
+#-----------------------------------------------------------------------------
+# === User configurable parameters ===
+#
+# Change these to modify the default behavior of yacc (if you wish)
+#-----------------------------------------------------------------------------
+
+yaccdebug = 1 # Debugging mode. If set, yacc generates a
+ # a 'parser.out' file in the current directory
+
+debug_file = 'parser.out' # Default name of the debugging file
+tab_module = 'parsetab' # Default name of the table module
+default_lr = 'SLR' # Default LR table generation method
+
+error_count = 3 # Number of symbols that must be shifted to leave recovery mode
+
+import re, types, sys, cStringIO, md5, os.path
+
+# Exception raised for yacc-related errors
+class YaccError(Exception): pass
+
+#-----------------------------------------------------------------------------
+# === LR Parsing Engine ===
+#
+# The following classes are used for the LR parser itself. These are not
+# used during table construction and are independent of the actual LR
+# table generation algorithm
+#-----------------------------------------------------------------------------
+
+# This class is used to hold non-terminal grammar symbols during parsing.
+# It normally has the following attributes set:
+# .type = Grammar symbol type
+# .value = Symbol value
+# .lineno = Starting line number
+# .endlineno = Ending line number (optional, set automatically)
+
+class YaccSymbol:
+ def __str__(self): return self.type
+ def __repr__(self): return str(self)
+
+# This class is a wrapper around the objects actually passed to each
+# grammar rule. Index lookup and assignment actually assign the
+# .value attribute of the underlying YaccSymbol object.
+# The lineno() method returns the line number of a given
+# item (or 0 if not defined). The linespan() method returns
+# a tuple of (startline,endline) representing the range of lines
+# for a symbol.
+
+class YaccSlice:
+ def __init__(self,s):
+ self.slice = s
+ self.pbstack = []
+
+ def __getitem__(self,n):
+ return self.slice[n].value
+
+ def __setitem__(self,n,v):
+ self.slice[n].value = v
+
+ def __len__(self):
+ return len(self.slice)
+
+ def lineno(self,n):
+ return getattr(self.slice[n],"lineno",0)
+
+ def linespan(self,n):
+ startline = getattr(self.slice[n],"lineno",0)
+ endline = getattr(self.slice[n],"endlineno",startline)
+ return startline,endline
+
+ def pushback(self,n):
+ if n <= 0:
+ raise ValueError, "Expected a positive value"
+ if n > (len(self.slice)-1):
+ raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)
+ for i in range(0,n):
+ self.pbstack.append(self.slice[-i-1])
+
+# The LR Parsing engine. This is defined as a class so that multiple parsers
+# can exist in the same process. A user never instantiates this directly.
+# Instead, the global yacc() function should be used to create a suitable Parser
+# object.
+
+class Parser:
+ def __init__(self,magic=None):
+
+ # This is a hack to keep users from trying to instantiate a Parser
+ # object directly.
+
+ if magic != "xyzzy":
+ raise YaccError, "Can't instantiate Parser. Use yacc() instead."
+
+ # Reset internal state
+ self.productions = None # List of productions
+ self.errorfunc = None # Error handling function
+ self.action = { } # LR Action table
+ self.goto = { } # LR goto table
+ self.require = { } # Attribute require table
+ self.method = "Unknown LR" # Table construction method used
+
+ def errok(self):
+ self.errorcount = 0
+
+ def restart(self):
+ del self.statestack[:]
+ del self.symstack[:]
+ sym = YaccSymbol()
+ sym.type = '$'
+ self.symstack.append(sym)
+ self.statestack.append(0)
+
+ def parse(self,input=None,lexer=None,debug=0):
+ lookahead = None # Current lookahead symbol
+ lookaheadstack = [ ] # Stack of lookahead symbols
+ actions = self.action # Local reference to action table
+ goto = self.goto # Local reference to goto table
+ prod = self.productions # Local reference to production list
+ pslice = YaccSlice(None) # Slice object passed to grammar rules
+ pslice.parser = self # Parser object
+ self.errorcount = 0 # Used during error recovery
+
+ # If no lexer was given, we will try to use the lex module
+ if not lexer:
+ import lex as lexer
+
+ pslice.lexer = lexer
+
+ # If input was supplied, pass to lexer
+ if input:
+ lexer.input(input)
+
+ # Tokenize function
+ get_token = lexer.token
+
+ statestack = [ ] # Stack of parsing states
+ self.statestack = statestack
+ symstack = [ ] # Stack of grammar symbols
+ self.symstack = symstack
+
+ errtoken = None # Err token
+
+ # The start state is assumed to be (0,$)
+ statestack.append(0)
+ sym = YaccSymbol()
+ sym.type = '$'
+ symstack.append(sym)
+
+ while 1:
+ # Get the next symbol on the input. If a lookahead symbol
+ # is already set, we just use that. Otherwise, we'll pull
+ # the next token off of the lookaheadstack or from the lexer
+ if not lookahead:
+ if not lookaheadstack:
+ lookahead = get_token() # Get the next token
+ else:
+ lookahead = lookaheadstack.pop()
+ if not lookahead:
+ lookahead = YaccSymbol()
+ lookahead.type = '$'
+ if debug:
+ print "%-20s : %s" % (lookahead, [xx.type for xx in symstack])
+
+ # Check the action table
+ s = statestack[-1]
+ ltype = lookahead.type
+ t = actions.get((s,ltype),None)
+
+ if t is not None:
+ if t > 0:
+ # shift a symbol on the stack
+ if ltype == '$':
+ # Error, end of input
+ print "yacc: Parse error. EOF"
+ return
+ statestack.append(t)
+ symstack.append(lookahead)
+ lookahead = None
+
+ # Decrease error count on successful shift
+ if self.errorcount > 0:
+ self.errorcount -= 1
+
+ continue
+
+ if t < 0:
+ # reduce a symbol on the stack, emit a production
+ p = prod[-t]
+ pname = p.name
+ plen = p.len
+
+ # Get production function
+ sym = YaccSymbol()
+ sym.type = pname # Production name
+ sym.value = None
+
+ if plen:
+ targ = symstack[-plen-1:]
+ targ[0] = sym
+ try:
+ sym.lineno = targ[1].lineno
+ sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
+ except AttributeError:
+ sym.lineno = 0
+ del symstack[-plen:]
+ del statestack[-plen:]
+ else:
+ sym.lineno = 0
+ targ = [ sym ]
+ pslice.slice = targ
+ pslice.pbstack = []
+ # Call the grammar rule with our special slice object
+ p.func(pslice)
+
+ # Validate attributes of the resulting value attribute
+# if require:
+# try:
+# t0 = targ[0]
+# r = Requires.get(t0.type,None)
+# t0d = t0.__dict__
+# if r:
+# for field in r:
+# tn = t0
+# for fname in field:
+# try:
+# tf = tn.__dict__
+# tn = tf.get(fname)
+# except StandardError:
+# tn = None
+# if not tn:
+# print "%s:%d: Rule %s doesn't set required attribute '%s'" % \
+# (p.file,p.line,p.name,".".join(field))
+# except TypeError,LookupError:
+# print "Bad requires directive " % r
+# pass
+
+
+ # If there was a pushback, put that on the stack
+ if pslice.pbstack:
+ lookaheadstack.append(lookahead)
+ for _t in pslice.pbstack:
+ lookaheadstack.append(_t)
+ lookahead = None
+
+ symstack.append(sym)
+ statestack.append(goto[statestack[-1],pname])
+ continue
+
+ if t == 0:
+ n = symstack[-1]
+ return getattr(n,"value",None)
+
+ if t == None:
+ # We have some kind of parsing error here. To handle this,
+ # we are going to push the current token onto the tokenstack
+ # and replace it with an 'error' token. If there are any synchronization
+ # rules, they may catch it.
+ #
+ # In addition to pushing the error token, we call call the user defined p_error()
+ # function if this is the first syntax error. This function is only called
+ # if errorcount == 0.
+
+ if not self.errorcount:
+ self.errorcount = error_count
+ errtoken = lookahead
+ if errtoken.type == '$':
+ errtoken = None # End of file!
+ if self.errorfunc:
+ global errok,token,restart
+ errok = self.errok # Set some special functions available in error recovery
+ token = get_token
+ restart = self.restart
+ tok = self.errorfunc(errtoken)
+ del errok, token, restart # Delete special functions
+
+ if not self.errorcount:
+ # User must have done some kind of panic mode recovery on their own. The returned token
+ # is the next lookahead
+ lookahead = tok
+ errtoken = None
+ continue
+ else:
+ if errtoken:
+ if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
+ else: lineno = 0
+ if lineno:
+ print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type)
+ else:
+ print "yacc: Syntax error, token=%s" % errtoken.type
+ else:
+ print "yacc: Parse error in input. EOF"
+ return
+
+ else:
+ self.errorcount = error_count
+
+ # case 1: the statestack only has 1 entry on it. If we're in this state, the
+ # entire parse has been rolled back and we're completely hosed. The token is
+ # discarded and we just keep going.
+
+ if len(statestack) <= 1 and lookahead.type != '$':
+ lookahead = None
+ errtoken = None
+ # Nuke the pushback stack
+ del lookaheadstack[:]
+ continue
+
+ # case 2: the statestack has a couple of entries on it, but we're
+ # at the end of the file. nuke the top entry and generate an error token
+
+ # Start nuking entries on the stack
+ if lookahead.type == '$':
+ # Whoa. We're really hosed here. Bail out
+ return
+
+ if lookahead.type != 'error':
+ sym = symstack[-1]
+ if sym.type == 'error':
+ # Hmmm. Error is on top of stack, we'll just nuke input
+ # symbol and continue
+ lookahead = None
+ continue
+ t = YaccSymbol()
+ t.type = 'error'
+ if hasattr(lookahead,"lineno"):
+ t.lineno = lookahead.lineno
+ t.value = lookahead
+ lookaheadstack.append(lookahead)
+ lookahead = t
+ else:
+ symstack.pop()
+ statestack.pop()
+
+ continue
+
+ # Call an error function here
+ raise RuntimeError, "yacc: internal parser error!!!\n"
+
+# -----------------------------------------------------------------------------
+# === Parser Construction ===
+#
+# The following functions and variables are used to implement the yacc() function
+# itself. This is pretty hairy stuff involving lots of error checking,
+# construction of LR items, kernels, and so forth. Although a lot of
+# this work is done using global variables, the resulting Parser object
+# is completely self contained--meaning that it is safe to repeatedly
+# call yacc() with different grammars in the same application.
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# validate_file()
+#
+# This function checks to see if there are duplicated p_rulename() functions
+# in the parser module file. Without this function, it is really easy for
+# users to make mistakes by cutting and pasting code fragments (and it's a real
+# bugger to try and figure out why the resulting parser doesn't work). Therefore,
+# we just do a little regular expression pattern matching of def statements
+# to try and detect duplicates.
+# -----------------------------------------------------------------------------
+
+def validate_file(filename):
+ base,ext = os.path.splitext(filename)
+ if ext != '.py': return 1 # No idea. Assume it's okay.
+
+ try:
+ f = open(filename)
+ lines = f.readlines()
+ f.close()
+ except IOError:
+ return 1 # Oh well
+
+ # Match def p_funcname(
+ fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
+ counthash = { }
+ linen = 1
+ noerror = 1
+ for l in lines:
+ m = fre.match(l)
+ if m:
+ name = m.group(1)
+ prev = counthash.get(name)
+ if not prev:
+ counthash[name] = linen
+ else:
+ print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev)
+ noerror = 0
+ linen += 1
+ return noerror
+
+# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
+def validate_dict(d):
+ for n,v in d.items():
+ if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue
+ if n[0:2] == 't_': continue
+
+ if n[0:2] == 'p_':
+ print "yacc: Warning. '%s' not defined as a function" % n
+ if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1:
+ try:
+ doc = v.__doc__.split(" ")
+ if doc[1] == ':':
+ print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n)
+ except StandardError:
+ pass
+
+# -----------------------------------------------------------------------------
+# === GRAMMAR FUNCTIONS ===
+#
+# The following global variables and functions are used to store, manipulate,
+# and verify the grammar rules specified by the user.
+# -----------------------------------------------------------------------------
+
+# Initialize all of the global variables used during grammar construction
+def initialize_vars():
+ global Productions, Prodnames, Prodmap, Terminals
+ global Nonterminals, First, Follow, Precedence, LRitems
+ global Errorfunc, Signature, Requires
+
+ Productions = [None] # A list of all of the productions. The first
+ # entry is always reserved for the purpose of
+ # building an augmented grammar
+
+ Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
+ # productions of that nonterminal.
+
+ Prodmap = { } # A dictionary that is only used to detect duplicate
+ # productions.
+
+ Terminals = { } # A dictionary mapping the names of terminal symbols to a
+ # list of the rules where they are used.
+
+ Nonterminals = { } # A dictionary mapping names of nonterminals to a list
+ # of rule numbers where they are used.
+
+ First = { } # A dictionary of precomputed FIRST(x) symbols
+
+ Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
+
+ Precedence = { } # Precedence rules for each terminal. Contains tuples of the
+ # form ('right',level) or ('nonassoc', level) or ('left',level)
+
+ LRitems = [ ] # A list of all LR items for the grammar. These are the
+ # productions with the "dot" like E -> E . PLUS E
+
+ Errorfunc = None # User defined error handler
+
+ Signature = md5.new() # Digital signature of the grammar rules, precedence
+ # and other information. Used to determined when a
+ # parsing table needs to be regenerated.
+
+ Requires = { } # Requires list
+
+ # File objects used when creating the parser.out debugging file
+ global _vf, _vfc
+ _vf = cStringIO.StringIO()
+ _vfc = cStringIO.StringIO()
+
+# -----------------------------------------------------------------------------
+# class Production:
+#
+# This class stores the raw information about a single production or grammar rule.
+# It has a few required attributes:
+#
+# name - Name of the production (nonterminal)
+# prod - A list of symbols making up its production
+# number - Production number.
+#
+# In addition, a few additional attributes are used to help with debugging or
+# optimization of table generation.
+#
+# file - File where production action is defined.
+# lineno - Line number where action is defined
+# func - Action function
+# prec - Precedence level
+# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E'
+# then lr_next refers to 'E -> E PLUS . E'
+# lr_index - LR item index (location of the ".") in the prod list.
+# len - Length of the production (number of symbols on right hand side)
+# -----------------------------------------------------------------------------
+
+class Production:
+ def __init__(self,**kw):
+ for k,v in kw.items():
+ setattr(self,k,v)
+ self.lr_index = -1
+ self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure
+ self.usyms = [ ]
+
+ def __str__(self):
+ if self.prod:
+ s = "%s -> %s" % (self.name," ".join(self.prod))
+ else:
+ s = "%s -> <empty>" % self.name
+ return s
+
+ def __repr__(self):
+ return str(self)
+
+ # Compute lr_items from the production
+ def lr_item(self,n):
+ if n > len(self.prod): return None
+ p = Production()
+ p.name = self.name
+ p.prod = list(self.prod)
+ p.number = self.number
+ p.lr_index = n
+ p.prod.insert(n,".")
+ p.prod = tuple(p.prod)
+ p.len = len(p.prod)
+ p.usyms = self.usyms
+
+ # Precompute list of productions immediately following
+ try:
+ p.lrafter = Prodnames[p.prod[n+1]]
+ except (IndexError,KeyError),e:
+ p.lrafter = []
+ try:
+ p.lrbefore = p.prod[n-1]
+ except IndexError:
+ p.lrbefore = None
+
+ return p
+
+class MiniProduction:
+ pass
+
+# Utility function
+def is_identifier(s):
+ for c in s:
+ if not (c.isalnum() or c == '_'): return 0
+ return 1
+
+# -----------------------------------------------------------------------------
+# add_production()
+#
+# Given an action function, this function assembles a production rule.
+# The production rule is assumed to be found in the function's docstring.
+# This rule has the general syntax:
+#
+# name1 ::= production1
+# | production2
+# | production3
+# ...
+# | productionn
+# name2 ::= production1
+# | production2
+# ...
+# -----------------------------------------------------------------------------
+
+def add_production(f,file,line,prodname,syms):
+
+ if Terminals.has_key(prodname):
+ print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname)
+ return -1
+ if prodname == 'error':
+ print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname)
+ return -1
+
+ if not is_identifier(prodname):
+ print "%s:%d: Illegal rule name '%s'" % (file,line,prodname)
+ return -1
+
+ for s in syms:
+ if not is_identifier(s) and s != '%prec':
+ print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)
+ return -1
+
+ # See if the rule is already in the rulemap
+ map = "%s -> %s" % (prodname,syms)
+ if Prodmap.has_key(map):
+ m = Prodmap[map]
+ print "%s:%d: Duplicate rule %s." % (file,line, m)
+ print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line)
+ return -1
+
+ p = Production()
+ p.name = prodname
+ p.prod = syms
+ p.file = file
+ p.line = line
+ p.func = f
+ p.number = len(Productions)
+
+
+ Productions.append(p)
+ Prodmap[map] = p
+ if not Nonterminals.has_key(prodname):
+ Nonterminals[prodname] = [ ]
+
+ # Add all terminals to Terminals
+ i = 0
+ while i < len(p.prod):
+ t = p.prod[i]
+ if t == '%prec':
+ try:
+ precname = p.prod[i+1]
+ except IndexError:
+ print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line)
+ return -1
+
+ prec = Precedence.get(precname,None)
+ if not prec:
+ print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname)
+ return -1
+ else:
+ p.prec = prec
+ del p.prod[i]
+ del p.prod[i]
+ continue
+
+ if Terminals.has_key(t):
+ Terminals[t].append(p.number)
+ # Is a terminal. We'll assign a precedence to p based on this
+ if not hasattr(p,"prec"):
+ p.prec = Precedence.get(t,('right',0))
+ else:
+ if not Nonterminals.has_key(t):
+ Nonterminals[t] = [ ]
+ Nonterminals[t].append(p.number)
+ i += 1
+
+ if not hasattr(p,"prec"):
+ p.prec = ('right',0)
+
+ # Set final length of productions
+ p.len = len(p.prod)
+ p.prod = tuple(p.prod)
+
+ # Calculate unique syms in the production
+ p.usyms = [ ]
+ for s in p.prod:
+ if s not in p.usyms:
+ p.usyms.append(s)
+
+ # Add to the global productions list
+ try:
+ Prodnames[p.name].append(p)
+ except KeyError:
+ Prodnames[p.name] = [ p ]
+ return 0
+
+# Given a raw rule function, this function rips out its doc string
+# and adds rules to the grammar
+
+def add_function(f):
+ line = f.func_code.co_firstlineno
+ file = f.func_code.co_filename
+ error = 0
+
+ if f.func_code.co_argcount > 1:
+ print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__)
+ return -1
+
+ if f.func_code.co_argcount < 1:
+ print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__)
+ return -1
+
+ if f.__doc__:
+ # Split the doc string into lines
+ pstrings = f.__doc__.splitlines()
+ lastp = None
+ dline = line
+ for ps in pstrings:
+ dline += 1
+ p = ps.split()
+ if not p: continue
+ try:
+ if p[0] == '|':
+ # This is a continuation of a previous rule
+ if not lastp:
+ print "%s:%d: Misplaced '|'." % (file,dline)
+ return -1
+ prodname = lastp
+ if len(p) > 1:
+ syms = p[1:]
+ else:
+ syms = [ ]
+ else:
+ prodname = p[0]
+ lastp = prodname
+ assign = p[1]
+ if len(p) > 2:
+ syms = p[2:]
+ else:
+ syms = [ ]
+ if assign != ':' and assign != '::=':
+ print "%s:%d: Syntax error. Expected ':'" % (file,dline)
+ return -1
+ e = add_production(f,file,dline,prodname,syms)
+ error += e
+ except StandardError:
+ print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps)
+ error -= 1
+ else:
+ print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__)
+ return error
+
+
+# Cycle checking code (Michael Dyck)
+
+def compute_reachable():
+ '''
+ Find each symbol that can be reached from the start symbol.
+ Print a warning for any nonterminals that can't be reached.
+ (Unused terminals have already had their warning.)
+ '''
+ Reachable = { }
+ for s in Terminals.keys() + Nonterminals.keys():
+ Reachable[s] = 0
+
+ mark_reachable_from( Productions[0].prod[0], Reachable )
+
+ for s in Nonterminals.keys():
+ if not Reachable[s]:
+ print "yacc: Symbol '%s' is unreachable." % s
+
+def mark_reachable_from(s, Reachable):
+ '''
+ Mark all symbols that are reachable from symbol s.
+ '''
+ if Reachable[s]:
+ # We've already reached symbol s.
+ return
+ Reachable[s] = 1
+ for p in Prodnames.get(s,[]):
+ for r in p.prod:
+ mark_reachable_from(r, Reachable)
+
+# -----------------------------------------------------------------------------
+# compute_terminates()
+#
+# This function looks at the various parsing rules and tries to detect
+# infinite recursion cycles (grammar rules where there is no possible way
+# to derive a string of only terminals).
+# -----------------------------------------------------------------------------
+def compute_terminates():
+ '''
+ Raise an error for any symbols that don't terminate.
+ '''
+ Terminates = {}
+
+ # Terminals:
+ for t in Terminals.keys():
+ Terminates[t] = 1
+
+ Terminates['$'] = 1
+
+ # Nonterminals:
+
+ # Initialize to false:
+ for n in Nonterminals.keys():
+ Terminates[n] = 0
+
+ # Then propagate termination until no change:
+ while 1:
+ some_change = 0
+ for (n,pl) in Prodnames.items():
+ # Nonterminal n terminates iff any of its productions terminates.
+ for p in pl:
+ # Production p terminates iff all of its rhs symbols terminate.
+ for s in p.prod:
+ if not Terminates[s]:
+ # The symbol s does not terminate,
+ # so production p does not terminate.
+ p_terminates = 0
+ break
+ else:
+ # didn't break from the loop,
+ # so every symbol s terminates
+ # so production p terminates.
+ p_terminates = 1
+
+ if p_terminates:
+ # symbol n terminates!
+ if not Terminates[n]:
+ Terminates[n] = 1
+ some_change = 1
+ # Don't need to consider any more productions for this n.
+ break
+
+ if not some_change:
+ break
+
+ some_error = 0
+ for (s,terminates) in Terminates.items():
+ if not terminates:
+ if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
+ # s is used-but-not-defined, and we've already warned of that,
+ # so it would be overkill to say that it's also non-terminating.
+ pass
+ else:
+ print "yacc: Infinite recursion detected for symbol '%s'." % s
+ some_error = 1
+
+ return some_error
+
+# -----------------------------------------------------------------------------
+# verify_productions()
+#
+# This function examines all of the supplied rules to see if they seem valid.
+# -----------------------------------------------------------------------------
+def verify_productions(cycle_check=1):
+ error = 0
+ for p in Productions:
+ if not p: continue
+
+ for s in p.prod:
+ if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
+ print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s)
+ error = 1
+ continue
+
+ unused_tok = 0
+ # Now verify all of the tokens
+ if yaccdebug:
+ _vf.write("Unused terminals:\n\n")
+ for s,v in Terminals.items():
+ if s != 'error' and not v:
+ print "yacc: Warning. Token '%s' defined, but not used." % s
+ if yaccdebug: _vf.write(" %s\n"% s)
+ unused_tok += 1
+
+ # Print out all of the productions
+ if yaccdebug:
+ _vf.write("\nGrammar\n\n")
+ for i in range(1,len(Productions)):
+ _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
+
+ unused_prod = 0
+ # Verify the use of all productions
+ for s,v in Nonterminals.items():
+ if not v:
+ p = Prodnames[s][0]
+ print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s)
+ unused_prod += 1
+
+
+ if unused_tok == 1:
+ print "yacc: Warning. There is 1 unused token."
+ if unused_tok > 1:
+ print "yacc: Warning. There are %d unused tokens." % unused_tok
+
+ if unused_prod == 1:
+ print "yacc: Warning. There is 1 unused rule."
+ if unused_prod > 1:
+ print "yacc: Warning. There are %d unused rules." % unused_prod
+
+ if yaccdebug:
+ _vf.write("\nTerminals, with rules where they appear\n\n")
+ ks = Terminals.keys()
+ ks.sort()
+ for k in ks:
+ _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
+ _vf.write("\nNonterminals, with rules where they appear\n\n")
+ ks = Nonterminals.keys()
+ ks.sort()
+ for k in ks:
+ _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
+
+ if (cycle_check):
+ compute_reachable()
+ error += compute_terminates()
+# error += check_cycles()
+ return error
+
+# -----------------------------------------------------------------------------
+# build_lritems()
+#
+# This function walks the list of productions and builds a complete set of the
+# LR items. The LR items are stored in two ways: First, they are uniquely
+# numbered and placed in the list _lritems. Second, a linked list of LR items
+# is built for each production. For example:
+#
+# E -> E PLUS E
+#
+# Creates the list
+#
+# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
+# -----------------------------------------------------------------------------
+
+def build_lritems():
+ for p in Productions:
+ lastlri = p
+ lri = p.lr_item(0)
+ i = 0
+ while 1:
+ lri = p.lr_item(i)
+ lastlri.lr_next = lri
+ if not lri: break
+ lri.lr_num = len(LRitems)
+ LRitems.append(lri)
+ lastlri = lri
+ i += 1
+
+ # In order for the rest of the parser generator to work, we need to
+ # guarantee that no more lritems are generated. Therefore, we nuke
+ # the p.lr_item method. (Only used in debugging)
+ # Production.lr_item = None
+
+# -----------------------------------------------------------------------------
+# add_precedence()
+#
+# Given a list of precedence rules, add to the precedence table.
+# -----------------------------------------------------------------------------
+
+def add_precedence(plist):
+ plevel = 0
+ error = 0
+ for p in plist:
+ plevel += 1
+ try:
+ prec = p[0]
+ terms = p[1:]
+ if prec != 'left' and prec != 'right' and prec != 'nonassoc':
+ print "yacc: Invalid precedence '%s'" % prec
+ return -1
+ for t in terms:
+ if Precedence.has_key(t):
+ print "yacc: Precedence already specified for terminal '%s'" % t
+ error += 1
+ continue
+ Precedence[t] = (prec,plevel)
+ except:
+ print "yacc: Invalid precedence table."
+ error += 1
+
+ return error
+
+# -----------------------------------------------------------------------------
+# augment_grammar()
+#
+# Compute the augmented grammar. This is just a rule S' -> start where start
+# is the starting symbol.
+# -----------------------------------------------------------------------------
+
+def augment_grammar(start=None):
+ if not start:
+ start = Productions[1].name
+ Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
+ Productions[0].usyms = [ start ]
+ Nonterminals[start].append(0)
+
+
+# -------------------------------------------------------------------------
+# first()
+#
+# Compute the value of FIRST1(beta) where beta is a tuple of symbols.
+#
+# During execution of compute_first1, the result may be incomplete.
+# Afterward (e.g., when called from compute_follow()), it will be complete.
+# -------------------------------------------------------------------------
+def first(beta):
+
+ # We are computing First(x1,x2,x3,...,xn)
+ result = [ ]
+ for x in beta:
+ x_produces_empty = 0
+
+ # Add all the non-<empty> symbols of First[x] to the result.
+ for f in First[x]:
+ if f == '<empty>':
+ x_produces_empty = 1
+ else:
+ if f not in result: result.append(f)
+
+ if x_produces_empty:
+ # We have to consider the next x in beta,
+ # i.e. stay in the loop.
+ pass
+ else:
+ # We don't have to consider any further symbols in beta.
+ break
+ else:
+ # There was no 'break' from the loop,
+ # so x_produces_empty was true for all x in beta,
+ # so beta produces empty as well.
+ result.append('<empty>')
+
+ return result
+
+
+# FOLLOW(x)
+# Given a non-terminal. This function computes the set of all symbols
+# that might follow it. Dragon book, p. 189.
+
+def compute_follow(start=None):
+ # Add '$' to the follow list of the start symbol
+ for k in Nonterminals.keys():
+ Follow[k] = [ ]
+
+ if not start:
+ start = Productions[1].name
+
+ Follow[start] = [ '$' ]
+
+ while 1:
+ didadd = 0
+ for p in Productions[1:]:
+ # Here is the production set
+ for i in range(len(p.prod)):
+ B = p.prod[i]
+ if Nonterminals.has_key(B):
+ # Okay. We got a non-terminal in a production
+ fst = first(p.prod[i+1:])
+ hasempty = 0
+ for f in fst:
+ if f != '<empty>' and f not in Follow[B]:
+ Follow[B].append(f)
+ didadd = 1
+ if f == '<empty>':
+ hasempty = 1
+ if hasempty or i == (len(p.prod)-1):
+ # Add elements of follow(a) to follow(b)
+ for f in Follow[p.name]:
+ if f not in Follow[B]:
+ Follow[B].append(f)
+ didadd = 1
+ if not didadd: break
+
+ if 0 and yaccdebug:
+ _vf.write('\nFollow:\n')
+ for k in Nonterminals.keys():
+ _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
+
+# -------------------------------------------------------------------------
+# compute_first1()
+#
+# Compute the value of FIRST1(X) for all symbols
+# -------------------------------------------------------------------------
+def compute_first1():
+
+ # Terminals:
+ for t in Terminals.keys():
+ First[t] = [t]
+
+ First['$'] = ['$']
+ First['#'] = ['#'] # what's this for?
+
+ # Nonterminals:
+
+ # Initialize to the empty set:
+ for n in Nonterminals.keys():
+ First[n] = []
+
+ # Then propagate symbols until no change:
+ while 1:
+ some_change = 0
+ for n in Nonterminals.keys():
+ for p in Prodnames[n]:
+ for f in first(p.prod):
+ if f not in First[n]:
+ First[n].append( f )
+ some_change = 1
+ if not some_change:
+ break
+
+ if 0 and yaccdebug:
+ _vf.write('\nFirst:\n')
+ for k in Nonterminals.keys():
+ _vf.write("%-20s : %s\n" %
+ (k, " ".join([str(s) for s in First[k]])))
+
+# -----------------------------------------------------------------------------
+# === SLR Generation ===
+#
+# The following functions are used to construct SLR (Simple LR) parsing tables
+# as described on p.221-229 of the dragon book.
+# -----------------------------------------------------------------------------
+
+# Global variables for the LR parsing engine
+def lr_init_vars():
+ global _lr_action, _lr_goto, _lr_method
+ global _lr_goto_cache
+
+ _lr_action = { } # Action table
+ _lr_goto = { } # Goto table
+ _lr_method = "Unknown" # LR method used
+ _lr_goto_cache = { }
+
+# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
+# prodlist is a list of productions.
+
+_add_count = 0 # Counter used to detect cycles
+
+def lr0_closure(I):
+ global _add_count
+
+ _add_count += 1
+ prodlist = Productions
+
+ # Add everything in I to J
+ J = I[:]
+ didadd = 1
+ while didadd:
+ didadd = 0
+ for j in J:
+ for x in j.lrafter:
+ if x.lr0_added == _add_count: continue
+ # Add B --> .G to J
+ J.append(x.lr_next)
+ x.lr0_added = _add_count
+ didadd = 1
+
+ return J
+
+# Compute the LR(0) goto function goto(I,X) where I is a set
+# of LR(0) items and X is a grammar symbol. This function is written
+# in a way that guarantees uniqueness of the generated goto sets
+# (i.e. the same goto set will never be returned as two different Python
+# objects). With uniqueness, we can later do fast set comparisons using
+# id(obj) instead of element-wise comparison.
+
+def lr0_goto(I,x):
+ # First we look for a previously cached entry
+ g = _lr_goto_cache.get((id(I),x),None)
+ if g: return g
+
+ # Now we generate the goto set in a way that guarantees uniqueness
+ # of the result
+
+ s = _lr_goto_cache.get(x,None)
+ if not s:
+ s = { }
+ _lr_goto_cache[x] = s
+
+ gs = [ ]
+ for p in I:
+ n = p.lr_next
+ if n and n.lrbefore == x:
+ s1 = s.get(id(n),None)
+ if not s1:
+ s1 = { }
+ s[id(n)] = s1
+ gs.append(n)
+ s = s1
+ g = s.get('$',None)
+ if not g:
+ if gs:
+ g = lr0_closure(gs)
+ s['$'] = g
+ else:
+ s['$'] = gs
+ _lr_goto_cache[(id(I),x)] = g
+ return g
+
+# Compute the kernel of a set of LR(0) items
+def lr0_kernel(I):
+ KI = [ ]
+ for p in I:
+ if p.name == "S'" or p.lr_index > 0 or p.len == 0:
+ KI.append(p)
+
+ return KI
+
+_lr0_cidhash = { }
+
+# Compute the LR(0) sets of item function
+def lr0_items():
+
+ C = [ lr0_closure([Productions[0].lr_next]) ]
+ i = 0
+ for I in C:
+ _lr0_cidhash[id(I)] = i
+ i += 1
+
+ # Loop over the items in C and each grammar symbols
+ i = 0
+ while i < len(C):
+ I = C[i]
+ i += 1
+
+ # Collect all of the symbols that could possibly be in the goto(I,X) sets
+ asyms = { }
+ for ii in I:
+ for s in ii.usyms:
+ asyms[s] = None
+
+ for x in asyms.keys():
+ g = lr0_goto(I,x)
+ if not g: continue
+ if _lr0_cidhash.has_key(id(g)): continue
+ _lr0_cidhash[id(g)] = len(C)
+ C.append(g)
+
+ return C
+
+# -----------------------------------------------------------------------------
+# slr_parse_table()
+#
+# This function constructs an SLR table.
+# -----------------------------------------------------------------------------
+def slr_parse_table():
+ global _lr_method
+ goto = _lr_goto # Goto array
+ action = _lr_action # Action array
+ actionp = { } # Action production array (temporary)
+
+ _lr_method = "SLR"
+
+ n_srconflict = 0
+ n_rrconflict = 0
+
+ if yaccdebug:
+ _vf.write("\n\nParsing method: SLR\n\n")
+
+ # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
+ # This determines the number of states
+
+ C = lr0_items()
+
+ # Build the parser table, state by state
+ st = 0
+ for I in C:
+ # Loop over each production in I
+ actlist = [ ] # List of actions
+
+ if yaccdebug:
+ _vf.write("\nstate %d\n\n" % st)
+ for p in I:
+ _vf.write(" (%d) %s\n" % (p.number, str(p)))
+ _vf.write("\n")
+
+ for p in I:
+ try:
+ if p.prod[-1] == ".":
+ if p.name == "S'":
+ # Start symbol. Accept!
+ action[st,"$"] = 0
+ actionp[st,"$"] = p
+ else:
+ # We are at the end of a production. Reduce!
+ for a in Follow[p.name]:
+ actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
+ r = action.get((st,a),None)
+ if r is not None:
+ # Whoa. Have a shift/reduce or reduce/reduce conflict
+ if r > 0:
+ # Need to decide on shift or reduce here
+ # By default we favor shifting. Need to add
+ # some precedence rules here.
+ sprec,slevel = Productions[actionp[st,a].number].prec
+ rprec,rlevel = Precedence.get(a,('right',0))
+ if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
+ # We really need to reduce here.
+ action[st,a] = -p.number
+ actionp[st,a] = p
+ if not slevel and not rlevel:
+ _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
+ _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
+ n_srconflict += 1
+ elif (slevel == rlevel) and (rprec == 'nonassoc'):
+ action[st,a] = None
+ else:
+ # Hmmm. Guess we'll keep the shift
+ if not slevel and not rlevel:
+ _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
+ _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
+ n_srconflict +=1
+ elif r < 0:
+ # Reduce/reduce conflict. In this case, we favor the rule
+ # that was defined first in the grammar file
+ oldp = Productions[-r]
+ pp = Productions[p.number]
+ if oldp.line > pp.line:
+ action[st,a] = -p.number
+ actionp[st,a] = p
+ # print "Reduce/reduce conflict in state %d" % st
+ n_rrconflict += 1
+ _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
+ _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
+ else:
+ print "Unknown conflict in state %d" % st
+ else:
+ action[st,a] = -p.number
+ actionp[st,a] = p
+ else:
+ i = p.lr_index
+ a = p.prod[i+1] # Get symbol right after the "."
+ if Terminals.has_key(a):
+ g = lr0_goto(I,a)
+ j = _lr0_cidhash.get(id(g),-1)
+ if j >= 0:
+ # We are in a shift state
+ actlist.append((a,p,"shift and go to state %d" % j))
+ r = action.get((st,a),None)
+ if r is not None:
+ # Whoa have a shift/reduce or shift/shift conflict
+ if r > 0:
+ if r != j:
+ print "Shift/shift conflict in state %d" % st
+ elif r < 0:
+ # Do a precedence check.
+ # - if precedence of reduce rule is higher, we reduce.
+ # - if precedence of reduce is same and left assoc, we reduce.
+ # - otherwise we shift
+ rprec,rlevel = Productions[actionp[st,a].number].prec
+ sprec,slevel = Precedence.get(a,('right',0))
+ if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
+ # We decide to shift here... highest precedence to shift
+ action[st,a] = j
+ actionp[st,a] = p
+ if not slevel and not rlevel:
+ n_srconflict += 1
+ _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
+ _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
+ elif (slevel == rlevel) and (rprec == 'nonassoc'):
+ action[st,a] = None
+ else:
+ # Hmmm. Guess we'll keep the reduce
+ if not slevel and not rlevel:
+ n_srconflict +=1
+ _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
+ _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
+
+ else:
+ print "Unknown conflict in state %d" % st
+ else:
+ action[st,a] = j
+ actionp[st,a] = p
+
+ except StandardError,e:
+ raise YaccError, "Hosed in slr_parse_table", e
+
+ # Print the actions associated with each terminal
+ if yaccdebug:
+ for a,p,m in actlist:
+ if action.has_key((st,a)):
+ if p is actionp[st,a]:
+ _vf.write(" %-15s %s\n" % (a,m))
+ _vf.write("\n")
+ for a,p,m in actlist:
+ if action.has_key((st,a)):
+ if p is not actionp[st,a]:
+ _vf.write(" ! %-15s [ %s ]\n" % (a,m))
+
+ # Construct the goto table for this state
+ if yaccdebug:
+ _vf.write("\n")
+ nkeys = { }
+ for ii in I:
+ for s in ii.usyms:
+ if Nonterminals.has_key(s):
+ nkeys[s] = None
+ for n in nkeys.keys():
+ g = lr0_goto(I,n)
+ j = _lr0_cidhash.get(id(g),-1)
+ if j >= 0:
+ goto[st,n] = j
+ if yaccdebug:
+ _vf.write(" %-15s shift and go to state %d\n" % (n,j))
+
+ st += 1
+
+ if n_srconflict == 1:
+ print "yacc: %d shift/reduce conflict" % n_srconflict
+ if n_srconflict > 1:
+ print "yacc: %d shift/reduce conflicts" % n_srconflict
+ if n_rrconflict == 1:
+ print "yacc: %d reduce/reduce conflict" % n_rrconflict
+ if n_rrconflict > 1:
+ print "yacc: %d reduce/reduce conflicts" % n_rrconflict
+
+
+# -----------------------------------------------------------------------------
+# ==== LALR(1) Parsing ====
+# **** UNFINISHED! 6/16/01
+# -----------------------------------------------------------------------------
+
+
+# Compute the lr1_closure of a set I. I is a list of tuples (p,a) where
+# p is a LR0 item and a is a terminal
+
+_lr1_add_count = 0
+
+def lr1_closure(I):
+ global _lr1_add_count
+
+ _lr1_add_count += 1
+
+ J = I[:]
+
+ # Loop over items (p,a) in I.
+ ji = 0
+ while ji < len(J):
+ p,a = J[ji]
+ # p = [ A -> alpha . B beta]
+
+ # For each production B -> gamma
+ for B in p.lr1_after:
+ f = tuple(p.lr1_beta + (a,))
+
+ # For each terminal b in first(Beta a)
+ for b in first(f):
+ # Check if (B -> . gamma, b) is in J
+ # Only way this can happen is if the add count mismatches
+ pn = B.lr_next
+ if pn.lr_added.get(b,0) == _lr1_add_count: continue
+ pn.lr_added[b] = _lr1_add_count
+ J.append((pn,b))
+ ji += 1
+
+ return J
+
+def lalr_parse_table():
+
+ # Compute some lr1 information about all of the productions
+ for p in LRitems:
+ try:
+ after = p.prod[p.lr_index + 1]
+ p.lr1_after = Prodnames[after]
+ p.lr1_beta = p.prod[p.lr_index + 2:]
+ except LookupError:
+ p.lr1_after = [ ]
+ p.lr1_beta = [ ]
+ p.lr_added = { }
+
+ # Compute the LR(0) items
+ C = lr0_items()
+ CK = []
+ for I in C:
+ CK.append(lr0_kernel(I))
+
+ print CK
+
+# -----------------------------------------------------------------------------
+# ==== LR Utility functions ====
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# _lr_write_tables()
+#
+# This function writes the LR parsing tables to a file
+# -----------------------------------------------------------------------------
+
+def lr_write_tables(modulename=tab_module):
+ filename = modulename + ".py"
+ try:
+ f = open(filename,"w")
+
+ f.write("""
+# %s
+# This file is automatically generated. Do not edit.
+
+_lr_method = %s
+
+_lr_signature = %s
+""" % (filename, repr(_lr_method), repr(Signature.digest())))
+
+ # Change smaller to 0 to go back to original tables
+ smaller = 1
+
+ # Factor out names to try and make smaller
+ if smaller:
+ items = { }
+
+ for k,v in _lr_action.items():
+ i = items.get(k[1])
+ if not i:
+ i = ([],[])
+ items[k[1]] = i
+ i[0].append(k[0])
+ i[1].append(v)
+
+ f.write("\n_lr_action_items = {")
+ for k,v in items.items():
+ f.write("%r:([" % k)
+ for i in v[0]:
+ f.write("%r," % i)
+ f.write("],[")
+ for i in v[1]:
+ f.write("%r," % i)
+
+ f.write("]),")
+ f.write("}\n")
+
+ f.write("""
+_lr_action = { }
+for _k, _v in _lr_action_items.items():
+ for _x,_y in zip(_v[0],_v[1]):
+ _lr_action[(_x,_k)] = _y
+del _lr_action_items
+""")
+
+ else:
+ f.write("\n_lr_action = { ");
+ for k,v in _lr_action.items():
+ f.write("(%r,%r):%r," % (k[0],k[1],v))
+ f.write("}\n");
+
+ if smaller:
+ # Factor out names to try and make smaller
+ items = { }
+
+ for k,v in _lr_goto.items():
+ i = items.get(k[1])
+ if not i:
+ i = ([],[])
+ items[k[1]] = i
+ i[0].append(k[0])
+ i[1].append(v)
+
+ f.write("\n_lr_goto_items = {")
+ for k,v in items.items():
+ f.write("%r:([" % k)
+ for i in v[0]:
+ f.write("%r," % i)
+ f.write("],[")
+ for i in v[1]:
+ f.write("%r," % i)
+
+ f.write("]),")
+ f.write("}\n")
+
+ f.write("""
+_lr_goto = { }
+for _k, _v in _lr_goto_items.items():
+ for _x,_y in zip(_v[0],_v[1]):
+ _lr_goto[(_x,_k)] = _y
+del _lr_goto_items
+""")
+ else:
+ f.write("\n_lr_goto = { ");
+ for k,v in _lr_goto.items():
+ f.write("(%r,%r):%r," % (k[0],k[1],v))
+ f.write("}\n");
+
+ # Write production table
+ f.write("_lr_productions = [\n")
+ for p in Productions:
+ if p:
+ if (p.func):
+ f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
+ else:
+ f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len))
+ else:
+ f.write(" None,\n")
+ f.write("]\n")
+ f.close()
+
+ except IOError,e:
+ print "Unable to create '%s'" % filename
+ print e
+ return
+
+def lr_read_tables(module=tab_module,optimize=0):
+ global _lr_action, _lr_goto, _lr_productions, _lr_method
+ try:
+ exec "import %s as parsetab" % module
+
+ if (optimize) or (Signature.digest() == parsetab._lr_signature):
+ _lr_action = parsetab._lr_action
+ _lr_goto = parsetab._lr_goto
+ _lr_productions = parsetab._lr_productions
+ _lr_method = parsetab._lr_method
+ return 1
+ else:
+ return 0
+
+ except (ImportError,AttributeError):
+ return 0
+
+# -----------------------------------------------------------------------------
+# yacc(module)
+#
+# Build the parser module
+# -----------------------------------------------------------------------------
+
+def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0):
+ global yaccdebug
+ yaccdebug = debug
+
+ initialize_vars()
+ files = { }
+ error = 0
+
+ # Add starting symbol to signature
+ if start:
+ Signature.update(start)
+
+ # Try to figure out what module we are working with
+ if module:
+ # User supplied a module object.
+ if not isinstance(module, types.ModuleType):
+ raise ValueError,"Expected a module"
+
+ ldict = module.__dict__
+
+ else:
+ # No module given. We might be able to get information from the caller.
+ # Throw an exception and unwind the traceback to get the globals
+
+ try:
+ raise RuntimeError
+ except RuntimeError:
+ e,b,t = sys.exc_info()
+ f = t.tb_frame
+ f = f.f_back # Walk out to our calling function
+ ldict = f.f_globals # Grab its globals dictionary
+
+ # If running in optimized mode. We're going to
+
+ if (optimize and lr_read_tables(tabmodule,1)):
+ # Read parse table
+ del Productions[:]
+ for p in _lr_productions:
+ if not p:
+ Productions.append(None)
+ else:
+ m = MiniProduction()
+ m.name = p[0]
+ m.len = p[1]
+ m.file = p[3]
+ m.line = p[4]
+ if p[2]:
+ m.func = ldict[p[2]]
+ Productions.append(m)
+
+ else:
+ # Get the tokens map
+ tokens = ldict.get("tokens",None)
+
+ if not tokens:
+ raise YaccError,"module does not define a list 'tokens'"
+ if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
+ raise YaccError,"tokens must be a list or tuple."
+
+ # Check to see if a requires dictionary is defined.
+ requires = ldict.get("require",None)
+ if requires:
+ if not (isinstance(requires,types.DictType)):
+ raise YaccError,"require must be a dictionary."
+
+ for r,v in requires.items():
+ try:
+ if not (isinstance(v,types.ListType)):
+ raise TypeError
+ v1 = [x.split(".") for x in v]
+ Requires[r] = v1
+ except StandardError:
+ print "Invalid specification for rule '%s' in require. Expected a list of strings" % r
+
+
+ # Build the dictionary of terminals. We a record a 0 in the
+ # dictionary to track whether or not a terminal is actually
+ # used in the grammar
+
+ if 'error' in tokens:
+ print "yacc: Illegal token 'error'. Is a reserved word."
+ raise YaccError,"Illegal token name"
+
+ for n in tokens:
+ if Terminals.has_key(n):
+ print "yacc: Warning. Token '%s' multiply defined." % n
+ Terminals[n] = [ ]
+
+ Terminals['error'] = [ ]
+
+ # Get the precedence map (if any)
+ prec = ldict.get("precedence",None)
+ if prec:
+ if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)):
+ raise YaccError,"precedence must be a list or tuple."
+ add_precedence(prec)
+ Signature.update(repr(prec))
+
+ for n in tokens:
+ if not Precedence.has_key(n):
+ Precedence[n] = ('right',0) # Default, right associative, 0 precedence
+
+ # Look for error handler
+ ef = ldict.get('p_error',None)
+ if ef:
+ if not isinstance(ef,types.FunctionType):
+ raise YaccError,"'p_error' defined, but is not a function."
+ eline = ef.func_code.co_firstlineno
+ efile = ef.func_code.co_filename
+ files[efile] = None
+
+ if (ef.func_code.co_argcount != 1):
+ raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline)
+ global Errorfunc
+ Errorfunc = ef
+ else:
+ print "yacc: Warning. no p_error() function is defined."
+
+ # Get the list of built-in functions with p_ prefix
+ symbols = [ldict[f] for f in ldict.keys()
+ if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_'
+ and ldict[f].__name__ != 'p_error')]
+
+ # Check for non-empty symbols
+ if len(symbols) == 0:
+ raise YaccError,"no rules of the form p_rulename are defined."
+
+ # Sort the symbols by line number
+ symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
+
+ # Add all of the symbols to the grammar
+ for f in symbols:
+ if (add_function(f)) < 0:
+ error += 1
+ else:
+ files[f.func_code.co_filename] = None
+
+ # Make a signature of the docstrings
+ for f in symbols:
+ if f.__doc__:
+ Signature.update(f.__doc__)
+
+ lr_init_vars()
+
+ if error:
+ raise YaccError,"Unable to construct parser."
+
+ if not lr_read_tables(tabmodule):
+
+ # Validate files
+ for filename in files.keys():
+ if not validate_file(filename):
+ error = 1
+
+ # Validate dictionary
+ validate_dict(ldict)
+
+ if start and not Prodnames.has_key(start):
+ raise YaccError,"Bad starting symbol '%s'" % start
+
+ augment_grammar(start)
+ error = verify_productions(cycle_check=check_recursion)
+ otherfunc = [ldict[f] for f in ldict.keys()
+ if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')]
+
+ if error:
+ raise YaccError,"Unable to construct parser."
+
+ build_lritems()
+ compute_first1()
+ compute_follow(start)
+
+ if method == 'SLR':
+ slr_parse_table()
+ elif method == 'LALR1':
+ lalr_parse_table()
+ return
+ else:
+ raise YaccError, "Unknown parsing method '%s'" % method
+
+ lr_write_tables(tabmodule)
+
+ if yaccdebug:
+ try:
+ f = open(debug_file,"w")
+ f.write(_vfc.getvalue())
+ f.write("\n\n")
+ f.write(_vf.getvalue())
+ f.close()
+ except IOError,e:
+ print "yacc: can't create '%s'" % debug_file,e
+
+ # Made it here. Create a parser object and set up its internal state.
+ # Set global parse() method to bound method of parser object.
+
+ p = Parser("xyzzy")
+ p.productions = Productions
+ p.errorfunc = Errorfunc
+ p.action = _lr_action
+ p.goto = _lr_goto
+ p.method = _lr_method
+ p.require = Requires
+
+ global parse
+ parse = p.parse
+
+ # Clean up all of the globals we created
+ if (not optimize):
+ yacc_cleanup()
+ return p
+
+# yacc_cleanup function. Delete all of the global variables
+# used during table construction
+
+def yacc_cleanup():
+ global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
+ del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
+
+ global Productions, Prodnames, Prodmap, Terminals
+ global Nonterminals, First, Follow, Precedence, LRitems
+ global Errorfunc, Signature, Requires
+
+ del Productions, Prodnames, Prodmap, Terminals
+ del Nonterminals, First, Follow, Precedence, LRitems
+ del Errorfunc, Signature, Requires
+
+ global _vf, _vfc
+ del _vf, _vfc
+
+
+# Stub that raises an error if parsing is attempted without first calling yacc()
+def parse(*args,**kwargs):
+ raise YaccError, "yacc: No parser built with yacc()"
+