diff options
Diffstat (limited to 'ext/ply')
98 files changed, 8572 insertions, 0 deletions
diff --git a/ext/ply/CHANGES b/ext/ply/CHANGES new file mode 100644 index 000000000..9c7334066 --- /dev/null +++ b/ext/ply/CHANGES @@ -0,0 +1,158 @@ +Version 1.3 +------------------------------ +12/10/02: jmdyck + Various minor adjustments to the code that Dave checked in today. + Updated test/yacc_{inf,unused}.exp to reflect today's changes. + +12/10/02: beazley + Incorporated a variety of minor bug fixes to empty production + handling and infinite recursion checking. Contributed by + Michael Dyck. + +12/10/02: beazley + Removed bogus recover() method call in yacc.restart() + +Version 1.2 +------------------------------ +11/27/02: beazley + Lexer and parser objects are now available as an attribute + of tokens and slices respectively. For example: + + def t_NUMBER(t): + r'\d+' + print t.lexer + + def p_expr_plus(t): + 'expr: expr PLUS expr' + print t.lexer + print t.parser + + This can be used for state management (if needed). + +10/31/02: beazley + Modified yacc.py to work with Python optimize mode. To make + this work, you need to use + + yacc.yacc(optimize=1) + + Furthermore, you need to first run Python in normal mode + to generate the necessary parsetab.py files. After that, + you can use python -O or python -OO. + + Note: optimized mode turns off a lot of error checking. + Only use when you are sure that your grammar is working. + Make sure parsetab.py is up to date! + +10/30/02: beazley + Added cloning of Lexer objects. For example: + + import copy + l = lex.lex() + lc = copy.copy(l) + + l.input("Some text") + lc.input("Some other text") + ... + + This might be useful if the same "lexer" is meant to + be used in different contexts---or if multiple lexers + are running concurrently. + +10/30/02: beazley + Fixed subtle bug with first set computation and empty productions. + Patch submitted by Michael Dyck. + +10/30/02: beazley + Fixed error messages to use "filename:line: message" instead + of "filename:line. message". This makes error reporting more + friendly to emacs. Patch submitted by François Pinard. + +10/30/02: beazley + Improvements to parser.out file. Terminals and nonterminals + are sorted instead of being printed in random order. + Patch submitted by François Pinard. + +10/30/02: beazley + Improvements to parser.out file output. Rules are now printed + in a way that's easier to understand. Contributed by Russ Cox. + +10/30/02: beazley + Added 'nonassoc' associativity support. This can be used + to disable the chaining of operators like a < b < c. + To use, simply specify 'nonassoc' in the precedence table + + precedence = ( + ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), # Unary minus operator + ) + + Patch contributed by Russ Cox. + +10/30/02: beazley + Modified the lexer to provide optional support for Python -O and -OO + modes. To make this work, Python *first* needs to be run in + unoptimized mode. This reads the lexing information and creates a + file "lextab.py". Then, run lex like this: + + # module foo.py + ... + ... + lex.lex(optimize=1) + + Once the lextab file has been created, subsequent calls to + lex.lex() will read data from the lextab file instead of using + introspection. In optimized mode (-O, -OO) everything should + work normally despite the loss of doc strings. + + To change the name of the file 'lextab.py' use the following: + + lex.lex(lextab="footab") + + (this creates a file footab.py) + + +Version 1.1 October 25, 2001 +------------------------------ + +10/25/01: beazley + Modified the table generator to produce much more compact data. + This should greatly reduce the size of the parsetab.py[c] file. + Caveat: the tables still need to be constructed so a little more + work is done in parsetab on import. + +10/25/01: beazley + There may be a possible bug in the cycle detector that reports errors + about infinite recursion. I'm having a little trouble tracking it + down, but if you get this problem, you can disable the cycle + detector as follows: + + yacc.yacc(check_recursion = 0) + +10/25/01: beazley + Fixed a bug in lex.py that sometimes caused illegal characters to be + reported incorrectly. Reported by Sverre Jørgensen. + +7/8/01 : beazley + Added a reference to the underlying lexer object when tokens are handled by + functions. The lexer is available as the 'lexer' attribute. This + was added to provide better lexing support for languages such as Fortran + where certain types of tokens can't be conveniently expressed as regular + expressions (and where the tokenizing function may want to perform a + little backtracking). Suggested by Pearu Peterson. + +6/20/01 : beazley + Modified yacc() function so that an optional starting symbol can be specified. + For example: + + yacc.yacc(start="statement") + + Normally yacc always treats the first production rule as the starting symbol. + However, if you are debugging your grammar it may be useful to specify + an alternative starting symbol. Idea suggested by Rich Salz. + +Version 1.0 June 18, 2001 +-------------------------- +Initial public offering + diff --git a/ext/ply/COPYING b/ext/ply/COPYING new file mode 100644 index 000000000..b1e3f5a26 --- /dev/null +++ b/ext/ply/COPYING @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/ext/ply/README b/ext/ply/README new file mode 100644 index 000000000..35b458d4c --- /dev/null +++ b/ext/ply/README @@ -0,0 +1,249 @@ +PLY (Python Lex-Yacc) Version 1.2 (November 27, 2002) + +David M. Beazley +Department of Computer Science +University of Chicago +Chicago, IL 60637 +beazley@cs.uchicago.edu + +Copyright (C) 2001 David M. Beazley + +$Header: /home/stever/bk/newmem2/ext/ply/README 1.1 03/06/06 14:53:34-00:00 stever@ $ + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +See the file COPYING for a complete copy of the LGPL. + +Introduction +============ + +PLY is a 100% Python implementation of the common parsing tools lex +and yacc. Although several other parsing tools are available for +Python, there are several reasons why you might want to consider PLY: + + - The tools are very closely modeled after traditional lex/yacc. + If you know how to use these tools in C, you will find PLY + to be similar. + + - PLY provides *very* extensive error reporting and diagnostic + information to assist in parser construction. The original + implementation was developed for instructional purposes. As + a result, the system tries to identify the most common types + of errors made by novice users. + + - PLY provides full support for empty productions, error recovery, + precedence specifiers, and moderately ambiguous grammars. + + - Parsing is based on LR-parsing which is fast, memory efficient, + better suited to large grammars, and which has a number of nice + properties when dealing with syntax errors and other parsing problems. + Currently, PLY builds its parsing tables using the SLR algorithm which + is slightly weaker than LALR(1) used in traditional yacc. + + - Like John Aycock's excellent SPARK toolkit, PLY uses Python + reflection to build lexers and parsers. This greatly simplifies + the task of parser construction since it reduces the number of files + and eliminates the need to run a separate lex/yacc tool before + running your program. + + - PLY can be used to build parsers for "real" programming languages. + Although it is not ultra-fast due to its Python implementation, + PLY can be used to parse grammars consisting of several hundred + rules (as might be found for a language like C). The lexer and LR + parser are also reasonably efficient when parsing typically + sized programs. + +The original version of PLY was developed for an Introduction to +Compilers course where students used it to build a compiler for a +simple Pascal-like language. Their compiler had to include lexical +analysis, parsing, type checking, type inference, and generation of +assembly code for the SPARC processor. Because of this, the current +implementation has been extensively tested and debugged. In addition, +most of the API and error checking steps have been adapted to address +common usability problems. + +How to Use +========== + +PLY consists of two files : lex.py and yacc.py. To use the system, +simply copy these files to your project and import them like standard +Python modules. + +The file doc/ply.html contains complete documentation on how to use +the system. + +The example directory contains several different examples including a +PLY specification for ANSI C as given in K&R 2nd Ed. Note: To use +the examples, you will need to copy the lex.py and yacc.py files to +the example directory. + +A simple example is found at the end of this document + +Requirements +============ +PLY requires the use of Python 2.0 or greater. It should work on +just about any platform. + +Resources +========= + +More information about PLY can be obtained on the PLY webpage at: + + http://systems.cs.uchicago.edu/ply + +For a detailed overview of parsing theory, consult the excellent +book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and +Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown +may also be useful. + +Given that this is the first release, I welcome your comments on how +to improve the current implementation. See the TODO file for things that +still need to be done. + +Acknowledgments +=============== + +A special thanks is in order for all of the students in CS326 who +suffered through about 25 different versions of these tools :-). + +Example +======= + +Here is a simple example showing a PLY implementation of a calculator with variables. + +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +# Ignored characters +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() + +# Precedence rules for the arithmetic operators +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names (for storing variables) +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) + + + + + + + + + + + + + + + + + diff --git a/ext/ply/TODO b/ext/ply/TODO new file mode 100644 index 000000000..b2978150d --- /dev/null +++ b/ext/ply/TODO @@ -0,0 +1,22 @@ +The PLY to-do list: + +$Header: /home/stever/bk/newmem2/ext/ply/TODO 1.1 03/06/06 14:53:34-00:00 stever@ $ + +1. Create a Python package using distutils + +2. More interesting parsing examples. + +3. Work on the ANSI C grammar so that it can actually parse C programs. To do this, + some extra code needs to be added to the lexer to deal with typedef names and enumeration + constants. + +4. Get LALR(1) to work. Hard, but not impossible. + +5. More tests in the test directory. + +6. Performance improvements and cleanup in yacc.py. + +7. More documentation. + +8. Lots and lots of cleanup. + diff --git a/ext/ply/doc/ply.html b/ext/ply/doc/ply.html new file mode 100644 index 000000000..2596066fe --- /dev/null +++ b/ext/ply/doc/ply.html @@ -0,0 +1,1642 @@ +<html> +<head> +<title>PLY (Python Lex-Yacc)</title> +</head> +<body bgcolor="#ffffff"> + +<h1>PLY (Python Lex-Yacc)</h1> + +<b> +David M. Beazley <br> +Department of Computer Science <br> +University of Chicago <br> +Chicago, IL 60637 <br> +beazley@cs.uchicago.edu <br> +</b> + +<p> +Documentation version: $Header: /home/stever/bk/newmem2/ext/ply/doc/ply.html 1.1 03/06/06 14:53:34-00:00 stever@ $ + +<h2>Introduction</h2> + +PLY is a Python-only implementation of the popular compiler +construction tools lex and yacc. The implementation borrows ideas +from a number of previous efforts; most notably John Aycock's SPARK +toolkit. However, the overall flavor of the implementation is more +closely modeled after the C version of lex and yacc. The other +significant feature of PLY is that it provides extensive input +validation and error reporting--much more so than other Python parsing +tools. + +<p> +Early versions of PLY were developed to support the Introduction to +Compilers Course at the University of Chicago. In this course, +students built a fully functional compiler for a simple Pascal-like +language. Their compiler, implemented entirely in Python, had to +include lexical analysis, parsing, type checking, type inference, +nested scoping, and code generation for the SPARC processor. +Approximately 30 different compiler implementations were completed in +this course. Most of PLY's interface and operation has been motivated by common +usability problems encountered by students. + +<p> +Because PLY was primarily developed as an instructional tool, you will +find it to be <em>MUCH</em> more picky about token and grammar rule +specification than most other Python parsing tools. In part, this +added formality is meant to catch common programming mistakes made by +novice users. However, advanced users will also find such features to +be useful when building complicated grammars for real programming +languages. It should also be noted that PLY does not provide much in the way +of bells and whistles (e.g., automatic construction of abstract syntax trees, +tree traversal, etc.). Instead, you will find a bare-bones, yet +fully capable lex/yacc implementation written entirely in Python. + +<p> +The rest of this document assumes that you are somewhat familar with +parsing theory, syntax directed translation, and automatic tools such +as lex and yacc. If you are unfamilar with these topics, you will +probably want to consult an introductory text such as "Compilers: +Principles, Techniques, and Tools", by Aho, Sethi, and Ullman. "Lex +and Yacc" by John Levine may also be handy. + +<h2>PLY Overview</h2> + +PLY consists of two separate tools; <tt>lex.py</tt> and +<tt>yacc.py</tt>. <tt>lex.py</tt> is used to break input text into a +collection of tokens specified by a collection of regular expression +rules. <tt>yacc.py</tt> is used to recognize language syntax that has +been specified in the form of a context free grammar. Currently, +<tt>yacc.py</tt> uses LR parsing and generates its parsing tables +using the SLR algorithm. LALR(1) parsing may be supported in a future +release. + +<p> +The two tools are meant to work together. Specifically, +<tt>lex.py</tt> provides an external interface in the form of a +<tt>token()</tt> function that returns the next valid token on the +input stream. <tt>yacc.py</tt> calls this repeatedly to retrieve +tokens and invoke grammar rules. The output of <tt>yacc.py</tt> is +often an Abstract Syntax Tree (AST). However, this is entirely up to +the user. If desired, <tt>yacc.py</tt> can also be used to implement +simple one-pass compilers. + +<p> +Like its Unix counterpart, <tt>yacc.py</tt> provides most of the +features you expect including extensive error checking, grammar +validation, support for empty productions, error tokens, and ambiguity +resolution via precedence rules. The primary difference between +<tt>yacc.py</tt> and <tt>yacc</tt> is the use of SLR parsing instead +of LALR(1). Although this slightly restricts the types of grammars +than can be successfully parsed, it is sufficiently powerful to handle most +kinds of normal programming language constructs. + +<p> +Finally, it is important to note that PLY relies on reflection +(introspection) to build its lexers and parsers. Unlike traditional +lex/yacc which require a special input file that is converted into a +separate source file, the specifications given to PLY <em>are</em> +valid Python programs. This means that there are no extra source +files nor is there a special compiler construction step (e.g., running +yacc to generate Python code for the compiler). + +<h2>Lex Example</h2> + +<tt>lex.py</tt> is used to write tokenizers. To do this, each token +must be defined by a regular expression rule. The following file +implements a very simple lexer for tokenizing simple integer expressions: + +<blockquote> +<pre> +# ------------------------------------------------------------ +# calclex.py +# +# tokenizer for a simple expression evaluator for +# numbers and +,-,*,/ +# ------------------------------------------------------------ +import lex + +# List of token names. This is always required +tokens = ( + 'NUMBER', + 'PLUS', + 'MINUS', + 'TIMES', + 'DIVIDE', + 'LPAREN', + 'RPAREN', +) + +# Regular expression rules for simple tokens +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_LPAREN = r'\(' +t_RPAREN = r'\)' + +# A regular expression rule with some action code +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Line %d: Number %s is too large!" % (t.lineno,t.value) + t.value = 0 + return t + +# Define a rule so we can track line numbers +def t_newline(t): + r'\n+' + t.lineno += len(t.value) + +# A string containing ignored characters (spaces and tabs) +t_ignore = ' \t' + +# Error handling rule +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +lex.lex() + +# Test it out +data = ''' +3 + 4 * 10 + + -20 *2 +''' + +# Give the lexer some input +lex.input(data) + +# Tokenize +while 1: + tok = lex.token() + if not tok: break # No more input + print tok +</pre> +</blockquote> + +In the example, the <tt>tokens</tt> list defines all of the possible +token names that can be produced by the lexer. This list is always required +and is used to perform a variety of validation checks. Following the <tt>tokens</tt> +list, regular expressions are written for each token. Each of these +rules are defined by making declarations with a special prefix <tt>t_</tt> to indicate that it +defines a token. For simple tokens, the regular expression can +be specified as strings such as this (note: Python raw strings are used since they are the +most convenient way to write regular expression strings): + +<blockquote> +<pre> +t_PLUS = r'\+' +</pre> +</blockquote> + +In this case, the name following the <tt>t_</tt> must exactly match one of the +names supplied in <tt>tokens</tt>. If some kind of action needs to be performed, +a token rule can be specified as a function. For example: + +<blockquote> +<pre> +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Number %s is too large!" % t.value + t.value = 0 + return t +</pre> +</blockquote> + +In this case, the regular expression rule is specified in the function documentation string. +The function always takes a single argument which is an instance of +<tt>LexToken</tt>. This object has attributes of <tt>t.type</tt> which is the token type, +<tt>t.value</tt> which is the lexeme, and <tt>t.lineno</tt> which is the current line number. +By default, <tt>t.type</tt> is set to the name following the <tt>t_</tt> prefix. The action +function can modify the contents of the <tt>LexToken</tt> object as appropriate. However, +when it is done, the resulting token should be returned. If no value is returned by the action +function, the token is simply discarded and the next token read. + +<p> +The rule <tt>t_newline()</tt> illustrates a regular expression rule +for a discarded token. In this case, a rule is written to match +newlines so that proper line number tracking can be performed. +By returning no value, the function causes the newline character to be +discarded. + +<p> +The special <tt>t_ignore</tt> rule is reserved by <tt>lex.py</tt> for characters +that should be completely ignored in the input stream. +Usually this is used to skip over whitespace and other non-essential characters. +Although it is possible to define a regular expression rule for whitespace in a manner +similar to <tt>t_newline()</tt>, the use of <tt>t_ignore</tt> provides substantially better +lexing performance because it is handled as a special case and is checked in a much +more efficient manner than the normal regular expression rules. + +<p> +Finally, the <tt>t_error()</tt> +function is used to handle lexing errors that occur when illegal +characters are detected. In this case, the <tt>t.value</tt> attribute contains the +rest of the input string that has not been tokenized. In the example, we simply print +the offending character and skip ahead one character by calling <tt>t.skip(1)</tt>. + +<p> +To build the lexer, the function <tt>lex.lex()</tt> is used. This function +uses Python reflection (or introspection) to read the the regular expression rules +out of the calling context and build the lexer. Once the lexer has been built, two functions can +be used to control the lexer. + +<ul> +<li><tt>lex.input(data)</tt>. Reset the lexer and store a new input string. +<li><tt>lex.token()</tt>. Return the next token. Returns a special <tt>LexToken</tt> instance on success or +None if the end of the input text has been reached. +</ul> + +The code at the bottom of the example shows how the lexer is actually used. When executed, +the following output will be produced: + +<blockquote> +<pre> +$ python example.py +LexToken(NUMBER,3,2) +LexToken(PLUS,'+',2) +LexToken(NUMBER,4,2) +LexToken(TIMES,'*',2) +LexToken(NUMBER,10,2) +LexToken(PLUS,'+',3) +LexToken(MINUS,'-',3) +LexToken(NUMBER,20,3) +LexToken(TIMES,'*',3) +LexToken(NUMBER,2,3) +</pre> +</blockquote> + +<h2>Lex Implementation Notes</h2> + +<ul> +<li><tt>lex.py</tt> uses the <tt>re</tt> module to do its patten matching. When building the master regular expression, +rules are added in the following order: +<p> +<ol> +<li>All tokens defined by functions are added in the same order as they appear in the lexer file. +<li>Tokens defined by strings are added by sorting them in order of decreasing regular expression length (longer expressions +are added first). +</ol> +<p> +Without this ordering, it can be difficult to correctly match certain types of tokens. For example, if you +wanted to have separate tokens for "=" and "==", you need to make sure that "==" is checked first. By sorting regular +expressions in order of decreasing length, this problem is solved for rules defined as strings. For functions, +the order can be explicitly controlled since rules appearing first are checked first. + +<P> +<li>The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this +rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data +such as open files or sockets. This limitation is primarily a side-effect of using the <tt>re</tt> module. + +<p> +<li> +To handle reserved words, it is usually easier to just match an identifier and do a special name lookup in a function +like this: + +<blockquote> +<pre> +reserved = { + 'if' : 'IF', + 'then' : 'THEN', + 'else' : 'ELSE', + 'while' : 'WHILE', + ... +} + +def t_ID(t): + r'[a-zA-Z_][a-zA-Z_0-9]*' + t.type = reserved.get(t.value,'ID') # Check for reserved words + return t +</pre> +</blockquote> + +<p> +<li>The lexer requires tokens to be defined as class instances with <tt>t.type</tt>, <tt>t.value</tt>, and <tt>t.lineno</tt> +attributes. By default, tokens are created as instances of the <tt>LexToken</tt> class defined internally to <tt>lex.py</tt>. +If desired, you can create new kinds of tokens provided that they have the three required attributes. However, +in practice, it is probably safer to stick with the default. + +<p> +<li>The only safe attribute for assigning token properties is <tt>t.value</tt>. In some cases, you may want to attach +a number of different properties to a token (e.g., symbol table entries for identifiers). To do this, replace <tt>t.value</tt> +with a tuple or class instance. For example: + +<blockquote> +<pre> +def t_ID(t): + ... + # For identifiers, create a (lexeme, symtab) tuple + t.value = (t.value, symbol_lookup(t.value)) + ... + return t +</pre> +</blockquote> + +Although allowed, do NOT assign additional attributes to the token object. For example, +<blockquote> +<pre> +def t_ID(t): + ... + # Bad implementation of above + t.symtab = symbol_lookup(t.value) + ... +</pre> +</blockquote> + +The reason you don't want to do this is that the <tt>yacc.py</tt> +module only provides public access to the <tt>t.value</tt> attribute of each token. +Therefore, any other attributes you assign are inaccessible (if you are familiar +with the internals of C lex/yacc, <tt>t.value</tt> is the same as <tt>yylval.tok</tt>). + +<p> +<li>To track line numbers, the lexer internally maintains a line +number variable. Each token automatically gets the value of the +current line number in the <tt>t.lineno</tt> attribute. To modify the +current line number, simply change the <tt>t.lineno</tt> attribute +in a function rule (as previously shown for +<tt>t_newline()</tt>). Even if the resulting token is discarded, +changes to the line number remain in effect for subsequent tokens. + +<p> +<li>To support multiple scanners in the same application, the <tt>lex.lex()</tt> function +actually returns a special <tt>Lexer</tt> object. This object has two methods +<tt>input()</tt> and <tt>token()</tt> that can be used to supply input and get tokens. For example: + +<blockquote> +<pre> +lexer = lex.lex() +lexer.input(sometext) +while 1: + tok = lexer.token() + if not tok: break + print tok +</pre> +</blockquote> + +The functions <tt>lex.input()</tt> and <tt>lex.token()</tt> are bound to the <tt>input()</tt> +and <tt>token()</tt> methods of the last lexer created by the lex module. + + +<p> +<li>To reduce compiler startup time and improve performance, the lexer can be built in optimized mode as follows: + +<blockquote> +<pre> +lex.lex(optimize=1) +</pre> +</blockquote> + +When used, most error checking and validation is disabled. This provides a slight performance +gain while tokenizing and tends to chop a few tenths of a second off startup time. Since it disables +error checking, this mode is not the default and is not recommended during development. However, once +you have your compiler fully working, it is usually safe to disable the error checks. + +<p> +<li>You can enable some additional debugging by building the lexer like this: + +<blockquote> +<pre> +lex.lex(debug=1) +</pre> +</blockquote> + +<p> +<li>To help you debug your lexer, <tt>lex.py</tt> comes with a simple main program which will either +tokenize input read from standard input or from a file. To use it, simply put this in your lexer: + +<blockquote> +<pre> +if __name__ == '__main__': + lex.runmain() +</pre> +</blockquote> + +Then, run you lexer as a main program such as <tt>python mylex.py</tt> + +<p> +<li>Since the lexer is written entirely in Python, its performance is +largely determined by that of the Python <tt>re</tt> module. Although +the lexer has been written to be as efficient as possible, it's not +blazingly fast when used on very large input files. Sorry. If +performance is concern, you might consider upgrading to the most +recent version of Python, creating a hand-written lexer, or offloading +the lexer into a C extension module. In defense of <tt>lex.py</tt>, +it's performance is not <em>that</em> bad when used on reasonably +sized input files. For instance, lexing a 4700 line C program with +32000 input tokens takes about 20 seconds on a 200 Mhz PC. Obviously, +it will run much faster on a more speedy machine. + +</ul> + +<h2>Parsing basics</h2> + +<tt>yacc.py</tt> is used to parse language syntax. Before showing an +example, there are a few important bits of background that must be +mentioned. First, <tt>syntax</tt> is usually specified in terms of a +context free grammar (CFG). For example, if you wanted to parse +simple arithmetic expressions, you might first write an unambiguous +grammar specification like this: + +<blockquote> +<pre> +expression : expression + term + | expression - term + | term + +term : term * factor + | term / factor + | factor + +factor : NUMBER + | ( expression ) +</pre> +</blockquote> + +Next, the semantic behavior of a language is often specified using a +technique known as syntax directed translation. In syntax directed +translation, attributes are attached to each symbol in a given grammar +rule along with an action. Whenever a particular grammar rule is +recognized, the action describes what to do. For example, given the +expression grammar above, you might write the specification for a +simple calculator like this: + +<blockquote> +<pre> +Grammar Action +-------------------------------- -------------------------------------------- +expression0 : expression1 + term expression0.val = expression1.val + term.val + | expression1 - term expression0.val = expression1.val - term.val + | term expression0.val = term.val + +term0 : term1 * factor term0.val = term1.val * factor.val + | term1 / factor term0.val = term1.val / factor.val + | factor term0.val = factor.val + +factor : NUMBER factor.val = int(NUMBER.lexval) + | ( expression ) factor.val = expression.val +</pre> +</blockquote> + +Finally, Yacc uses a parsing technique known as LR-parsing or shift-reduce parsing. LR parsing is a +bottom up technique that tries to recognize the right-hand-side of various grammar rules. +Whenever a valid right-hand-side is found in the input, the appropriate action code is triggered and the +grammar symbols are replaced by the grammar symbol on the left-hand-side. + +<p> +LR parsing is commonly implemented by shifting grammar symbols onto a stack and looking at the stack and the next +input token for patterns. The details of the algorithm can be found in a compiler text, but the +following example illustrates the steps that are performed if you wanted to parse the expression +<tt>3 + 5 * (10 - 20)</tt> using the grammar defined above: + +<blockquote> +<pre> +Step Symbol Stack Input Tokens Action +---- --------------------- --------------------- ------------------------------- +1 $ 3 + 5 * ( 10 - 20 )$ Shift 3 +2 $ 3 + 5 * ( 10 - 20 )$ Reduce factor : NUMBER +3 $ factor + 5 * ( 10 - 20 )$ Reduce term : factor +4 $ term + 5 * ( 10 - 20 )$ Reduce expr : term +5 $ expr + 5 * ( 10 - 20 )$ Shift + +6 $ expr + 5 * ( 10 - 20 )$ Shift 5 +7 $ expr + 5 * ( 10 - 20 )$ Reduce factor : NUMBER +8 $ expr + factor * ( 10 - 20 )$ Reduce term : factor +9 $ expr + term * ( 10 - 20 )$ Shift * +10 $ expr + term * ( 10 - 20 )$ Shift ( +11 $ expr + term * ( 10 - 20 )$ Shift 10 +12 $ expr + term * ( 10 - 20 )$ Reduce factor : NUMBER +13 $ expr + term * ( factor - 20 )$ Reduce term : factor +14 $ expr + term * ( term - 20 )$ Reduce expr : term +15 $ expr + term * ( expr - 20 )$ Shift - +16 $ expr + term * ( expr - 20 )$ Shift 20 +17 $ expr + term * ( expr - 20 )$ Reduce factor : NUMBER +18 $ expr + term * ( expr - factor )$ Reduce term : factor +19 $ expr + term * ( expr - term )$ Reduce expr : expr - term +20 $ expr + term * ( expr )$ Shift ) +21 $ expr + term * ( expr ) $ Reduce factor : (expr) +22 $ expr + term * factor $ Reduce term : term * factor +23 $ expr + term $ Reduce expr : expr + term +24 $ expr $ Reduce expr +25 $ $ Success! +</pre> +</blockquote> + +When parsing the expression, an underlying state machine and the current input token determine what to do next. +If the next token looks like part of a valid grammar rule (based on other items on the stack), it is generally shifted +onto the stack. If the top of the stack contains a valid right-hand-side of a grammar rule, it is +usually "reduced" and the symbols replaced with the symbol on the left-hand-side. When this reduction occurs, the +appropriate action is triggered (if defined). If the input token can't be shifted and the top of stack doesn't match +any grammar rules, a syntax error has occurred and the parser must take some kind of recovery step (or bail out). + +<p> +It is important to note that the underlying implementation is actually built around a large finite-state machine +and some tables. The construction of these tables is quite complicated and beyond the scope of this discussion. +However, subtle details of this process explain why, in the example above, the parser chooses to shift a token +onto the stack in step 9 rather than reducing the rule <tt>expr : expr + term</tt>. + +<h2>Yacc example</h2> + +Suppose you wanted to make a grammar for simple arithmetic expressions as previously described. Here is +how you would do it with <tt>yacc.py</tt>: + +<blockquote> +<pre> +# Yacc example + +import yacc + +# Get the token map from the lexer. This is required. +from calclex import tokens + +def p_expression_plus(t): + 'expression : expression PLUS term' + t[0] = t[1] + t[3] + +def p_expression_minus(t): + 'expression : expression MINUS term' + t[0] = t[1] - t[3] + +def p_expression_term(t): + 'expression : term' + t[0] = t[1] + +def p_term_times(t): + 'term : term TIMES factor' + t[0] = t[1] * t[3] + +def p_term_div(t): + 'term : term DIVIDE factor' + t[0] = t[1] / t[3] + +def p_term_factor(t): + 'term : factor' + t[0] = t[1] + +def p_factor_num(t): + 'factor : NUMBER' + t[0] = t[1] + +def p_factor_expr(t): + 'factor : LPAREN expression RPAREN' + t[0] = t[2] + +# Error rule for syntax errors +def p_error(t): + print "Syntax error in input!" + +# Build the parser +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: continue + result = yacc.parse(s) + print result +</pre> +</blockquote> + +In this example, each grammar rule is defined by a Python function where the docstring to that function contains the +appropriate context-free grammar specification (an idea borrowed from John Aycock's SPARK toolkit). Each function accepts a single +argument <tt>t</tt> that is a sequence containing the values of each grammar symbol in the corresponding rule. The values of +<tt>t[i]</tt> are mapped to grammar symbols as shown here: + +<blockquote> +<pre> +def p_expression_plus(t): + 'expression : expression PLUS term' + # ^ ^ ^ ^ + # t[0] t[1] t[2] t[3] + + t[0] = t[1] + t[3] +</pre> +</blockquote> + +For tokens, the "value" in the corresponding <tt>t[i]</tt> is the +<em>same</em> as the value of the <tt>t.value</tt> attribute assigned +in the lexer module. For non-terminals, the value is determined by +whatever is placed in <tt>t[0]</tt> when rules are reduced. This +value can be anything at all. However, it probably most common for +the value to be a simple Python type, a tuple, or an instance. In this example, we +are relying on the fact that the <tt>NUMBER</tt> token stores an integer value in its value +field. All of the other rules simply perform various types of integer operations and store +the result. + +<p> +The first rule defined in the yacc specification determines the starting grammar +symbol (in this case, a rule for <tt>expression</tt> appears first). Whenever +the starting rule is reduced by the parser and no more input is available, parsing +stops and the final value is returned (this value will be whatever the top-most rule +placed in <tt>t[0]</tt>). + +<p>The <tt>p_error(t)</tt> rule is defined to catch syntax errors. See the error handling section +below for more detail. + +<p> +To build the parser, call the <tt>yacc.yacc()</tt> function. This function +looks at the module and attempts to construct all of the LR parsing tables for the grammar +you have specified. The first time <tt>yacc.yacc()</tt> is invoked, you will get a message +such as this: + +<blockquote> +<pre> +$ python calcparse.py +yacc: Generating SLR parsing table... +calc > +</pre> +</blockquote> + +Since table construction is relatively expensive (especially for large +grammars), the resulting parsing table is written to the current +directory in a file called <tt>parsetab.py</tt>. In addition, a +debugging file called <tt>parser.out</tt> is created. On subsequent +executions, <tt>yacc</tt> will reload the table from +<tt>parsetab.py</tt> unless it has detected a change in the underlying +grammar (in which case the tables and <tt>parsetab.py</tt> file are +regenerated). + +<p> +If any errors are detected in your grammar specification, <tt>yacc.py</tt> will produce +diagnostic messages and possibly raise an exception. Some of the errors that can be detected include: + +<ul> +<li>Duplicated function names (if more than one rule function have the same name in the grammar file). +<li>Shift/reduce and reduce/reduce conflicts generated by ambiguous grammars. +<li>Badly specified grammar rules. +<li>Infinite recursion (rules that can never terminate). +<li>Unused rules and tokens +<li>Undefined rules and tokens +</ul> + +The next few sections now discuss a few finer points of grammar construction. + +<h2>Combining Grammar Rule Functions</h2> + +When grammar rules are similar, they can be combined into a single function. +For example, consider the two rules in our earlier example: + +<blockquote> +<pre> +def p_expression_plus(t): + 'expression : expression PLUS term' + t[0] = t[1] + t[3] + +def p_expression_minus(t): + 'expression : expression MINUS term' + t[0] = t[1] - t[3] +</pre> +</blockquote> + +Instead of writing two functions, you might write a single function like this: + +<blockquote> +<pre> +def p_expression(t): + '''expression : expression PLUS term + | expression MINUS term''' + if t[2] == '+': + t[0] = t[1] + t[3] + elif t[2] == '-': + t[0] = t[1] - t[3] +</pre> +</blockquote> + +In general, the doc string for any given function can contain multiple grammar rules. So, it would +have also been legal (although possibly confusing) to write this: + +<blockquote> +<pre> +def p_binary_operators(t): + '''expression : expression PLUS term + | expression MINUS term + term : term TIMES factor + | term DIVIDE factor''' + if t[2] == '+': + t[0] = t[1] + t[3] + elif t[2] == '-': + t[0] = t[1] - t[3] + elif t[2] == '*': + t[0] = t[1] * t[3] + elif t[2] == '/': + t[0] = t[1] / t[3] +</pre> +</blockquote> + +When combining grammar rules into a single function, it is usually a good idea for all of the rules to have +a similar structure (e.g., the same number of terms). Otherwise, the corresponding action code may be more +complicated than necessary. + +<h2>Empty Productions</h2> + +<tt>yacc.py</tt> can handle empty productions by defining a rule like this: + +<blockquote> +<pre> +def p_empty(t): + 'empty :' + pass +</pre> +</blockquote> + +Now to use the empty production, simply use 'empty' as a symbol. For example: + +<blockquote> +<pre> +def p_optitem(t): + 'optitem : item' + ' | empty' + ... +</pre> +</blockquote> + +<h2>Dealing With Ambiguous Grammars</h2> + +The expression grammar given in the earlier example has been written in a special format to eliminate ambiguity. +However, in many situations, it is extremely difficult or awkward to write grammars in this format. A +much more natural way to express the grammar is in a more compact form like this: + +<blockquote> +<pre> +expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | LPAREN expression RPAREN + | NUMBER +</pre> +</blockquote> + +Unfortunately, this grammar specification is ambiguous. For example, if you are parsing the string +"3 * 4 + 5", there is no way to tell how the operators are supposed to be grouped. +For example, does this expression mean "(3 * 4) + 5" or is it "3 * (4+5)"? + +<p> +When an ambiguous grammar is given to <tt>yacc.py</tt> it will print messages about "shift/reduce conflicts" +or a "reduce/reduce conflicts". A shift/reduce conflict is caused when the parser generator can't decide +whether or not to reduce a rule or shift a symbol on the parsing stack. For example, consider +the string "3 * 4 + 5" and the internal parsing stack: + +<blockquote> +<pre> +Step Symbol Stack Input Tokens Action +---- --------------------- --------------------- ------------------------------- +1 $ 3 * 4 + 5$ Shift 3 +2 $ 3 * 4 + 5$ Reduce : expression : NUMBER +3 $ expr * 4 + 5$ Shift * +4 $ expr * 4 + 5$ Shift 4 +5 $ expr * 4 + 5$ Reduce: expression : NUMBER +6 $ expr * expr + 5$ SHIFT/REDUCE CONFLICT ???? +</pre> +</blockquote> + +In this case, when the parser reaches step 6, it has two options. One is the reduce the +rule <tt>expr : expr * expr</tt> on the stack. The other option is to shift the +token <tt>+</tt> on the stack. Both options are perfectly legal from the rules +of the context-free-grammar. + +<p> +By default, all shift/reduce conflicts are resolved in favor of shifting. Therefore, in the above +example, the parser will always shift the <tt>+</tt> instead of reducing. Although this +strategy works in many cases (including the ambiguous if-then-else), it is not enough for arithmetic +expressions. In fact, in the above example, the decision to shift <tt>+</tt> is completely wrong---we should have +reduced <tt>expr * expr</tt> since multiplication has higher precedence than addition. + +<p>To resolve ambiguity, especially in expression grammars, <tt>yacc.py</tt> allows individual +tokens to be assigned a precedence level and associativity. This is done by adding a variable +<tt>precedence</tt> to the grammar file like this: + +<blockquote> +<pre> +precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), +) +</pre> +</blockquote> + +This declaration specifies that <tt>PLUS</tt>/<tt>MINUS</tt> have +the same precedence level and are left-associative and that +<tt>TIMES</tt>/<tt>DIVIDE</tt> have the same precedence and are left-associative. +Furthermore, the declaration specifies that <tt>TIMES</tt>/<tt>DIVIDE</tt> have higher +precedence than <tt>PLUS</tt>/<tt>MINUS</tt> (since they appear later in the +precedence specification). + +<p> +The precedence specification is used to attach a numerical precedence value and associativity direction +to each grammar rule. This is always determined by the precedence of the right-most terminal symbol. Therefore, +if PLUS/MINUS had a precedence of 1 and TIMES/DIVIDE had a precedence of 2, the grammar rules +would have precedence values as follows: + +<blockquote> +<pre> +expression : expression PLUS expression # prec = 1, left + | expression MINUS expression # prec = 1, left + | expression TIMES expression # prec = 2, left + | expression DIVIDE expression # prec = 2, left + | LPAREN expression RPAREN # prec = unknown + | NUMBER # prec = unknown +</pre> +</blockquote> + +When shift/reduce conflicts are encountered, the parser generator resolves the conflict by +looking at the precedence rules and associativity specifiers. + +<p> +<ol> +<li>If the current token has higher precedence, it is shifted. +<li>If the grammar rule on the stack has higher precedence, the rule is reduced. +<li>If the current token and the grammar rule have the same precedence, the +rule is reduced for left associativity, whereas the token is shifted for right associativity. +<li>If nothing is known about the precedence, shift/reduce conflicts are resolved in +favor of shifting (the default). +</ol> + +<p> +When shift/reduce conflicts are resolved using the first three techniques (with the help of +precedence rules), <tt>yacc.py</tt> will report no errors or conflicts in the grammar. + +<p> +One problem with the precedence specifier technique is that it is sometimes necessary to +change the precedence of an operator in certain contents. For example, consider a unary-minus operator +in "3 + 4 * -5". Normally, unary minus has a very high precedence--being evaluated before the multiply. +However, in our precedence specifier, MINUS has a lower precedence than TIMES. To deal with this, +precedence rules can be given for fictitious tokens like this: + +<blockquote> +<pre> +precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), # Unary minus operator +) +</pre> +</blockquote> + +Now, in the grammar file, we can write our unary minus rule like this: + +<blockquote> +<pre> +def p_expr_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] +</pre> +</blockquote> + +In this case, <tt>%prec UMINUS</tt> overrides the default rule precedence--setting it to that +of UMINUS in the precedence specifier. + +<p> +It is also possible to specify non-associativity in the <tt>precedence</tt> table. This would +be used when you <em>don't</em> want operations to chain together. For example, suppose +you wanted to support a comparison operators like <tt><</tt> and <tt>></tt> but you didn't want to allow +combinations like <tt>a < b < c</tt>. To do this, simply specify a rule like this: + +<blockquote> +<pre> +precedence = ( + ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), # Unary minus operator +) +</pre> +</blockquote> + +<p> +Reduce/reduce conflicts are caused when there are multiple grammar +rules that can be applied to a given set of symbols. This kind of +conflict is almost always bad and is always resolved by picking the +rule that appears first in the grammar file. Reduce/reduce conflicts +are almost always caused when different sets of grammar rules somehow +generate the same set of symbols. For example: + +<blockquote> +<pre> +assignment : ID EQUALS NUMBER + | ID EQUALS expression + +expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | LPAREN expression RPAREN + | NUMBER +</pre> +</blockquote> + +In this case, a reduce/reduce conflict exists between these two rules: + +<blockquote> +<pre> +assignment : ID EQUALS NUMBER +expression : NUMBER +</pre> +</blockquote> + +For example, if you wrote "a = 5", the parser can't figure out if this +is supposed to reduced as <tt>assignment : ID EQUALS NUMBER</tt> or +whether it's supposed to reduce the 5 as an expression and then reduce +the rule <tt>assignment : ID EQUALS expression</tt>. + +<h2>The parser.out file</h2> + +Tracking down shift/reduce and reduce/reduce conflicts is one of the finer pleasures of using an LR +parsing algorithm. To assist in debugging, <tt>yacc.py</tt> creates a debugging file called +'parser.out' when it generates the parsing table. The contents of this file look like the following: + +<blockquote> +<pre> +Unused terminals: + + +Grammar + +Rule 1 expression -> expression PLUS expression +Rule 2 expression -> expression MINUS expression +Rule 3 expression -> expression TIMES expression +Rule 4 expression -> expression DIVIDE expression +Rule 5 expression -> NUMBER +Rule 6 expression -> LPAREN expression RPAREN + +Terminals, with rules where they appear + +TIMES : 3 +error : +MINUS : 2 +RPAREN : 6 +LPAREN : 6 +DIVIDE : 4 +PLUS : 1 +NUMBER : 5 + +Nonterminals, with rules where they appear + +expression : 1 1 2 2 3 3 4 4 6 0 + + +Parsing method: SLR + + +state 0 + + S' -> . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 1 + + S' -> expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + PLUS shift and go to state 6 + MINUS shift and go to state 5 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + +state 2 + + expression -> LPAREN . expression RPAREN + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 3 + + expression -> NUMBER . + + $ reduce using rule 5 + PLUS reduce using rule 5 + MINUS reduce using rule 5 + TIMES reduce using rule 5 + DIVIDE reduce using rule 5 + RPAREN reduce using rule 5 + + +state 4 + + expression -> expression TIMES . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 5 + + expression -> expression MINUS . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 6 + + expression -> expression PLUS . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 7 + + expression -> expression DIVIDE . expression + expression -> . expression PLUS expression + expression -> . expression MINUS expression + expression -> . expression TIMES expression + expression -> . expression DIVIDE expression + expression -> . NUMBER + expression -> . LPAREN expression RPAREN + + NUMBER shift and go to state 3 + LPAREN shift and go to state 2 + + +state 8 + + expression -> LPAREN expression . RPAREN + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + RPAREN shift and go to state 13 + PLUS shift and go to state 6 + MINUS shift and go to state 5 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + +state 9 + + expression -> expression TIMES expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 3 + PLUS reduce using rule 3 + MINUS reduce using rule 3 + TIMES reduce using rule 3 + DIVIDE reduce using rule 3 + RPAREN reduce using rule 3 + + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + ! TIMES [ shift and go to state 4 ] + ! DIVIDE [ shift and go to state 7 ] + +state 10 + + expression -> expression MINUS expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 2 + PLUS reduce using rule 2 + MINUS reduce using rule 2 + RPAREN reduce using rule 2 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + ! TIMES [ reduce using rule 2 ] + ! DIVIDE [ reduce using rule 2 ] + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + +state 11 + + expression -> expression PLUS expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 1 + PLUS reduce using rule 1 + MINUS reduce using rule 1 + RPAREN reduce using rule 1 + TIMES shift and go to state 4 + DIVIDE shift and go to state 7 + + ! TIMES [ reduce using rule 1 ] + ! DIVIDE [ reduce using rule 1 ] + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + +state 12 + + expression -> expression DIVIDE expression . + expression -> expression . PLUS expression + expression -> expression . MINUS expression + expression -> expression . TIMES expression + expression -> expression . DIVIDE expression + + $ reduce using rule 4 + PLUS reduce using rule 4 + MINUS reduce using rule 4 + TIMES reduce using rule 4 + DIVIDE reduce using rule 4 + RPAREN reduce using rule 4 + + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] + ! TIMES [ shift and go to state 4 ] + ! DIVIDE [ shift and go to state 7 ] + +state 13 + + expression -> LPAREN expression RPAREN . + + $ reduce using rule 6 + PLUS reduce using rule 6 + MINUS reduce using rule 6 + TIMES reduce using rule 6 + DIVIDE reduce using rule 6 + RPAREN reduce using rule 6 +</pre> +</blockquote> + +In the file, each state of the grammar is described. Within each state the "." indicates the current +location of the parse within any applicable grammar rules. In addition, the actions for each valid +input token are listed. When a shift/reduce or reduce/reduce conflict arises, rules <em>not</em> selected +are prefixed with an !. For example: + +<blockquote> +<pre> + ! TIMES [ reduce using rule 2 ] + ! DIVIDE [ reduce using rule 2 ] + ! PLUS [ shift and go to state 6 ] + ! MINUS [ shift and go to state 5 ] +</pre> +</blockquote> + +By looking at these rules (and with a little practice), you can usually track down the source +of most parsing conflicts. It should also be stressed that not all shift-reduce conflicts are +bad. However, the only way to be sure that they are resolved correctly is to look at <tt>parser.out</tt>. + +<h2>Syntax Error Handling</h2> + +When a syntax error occurs during parsing, the error is immediately +detected (i.e., the parser does not read any more tokens beyond the +source of the error). Error recovery in LR parsers is a delicate +topic that involves ancient rituals and black-magic. The recovery mechanism +provided by <tt>yacc.py</tt> is comparable to Unix yacc so you may want +consult a book like O'Reilly's "Lex and Yacc" for some of the finer details. + +<p> +When a syntax error occurs, <tt>yacc.py</tt> performs the following steps: + +<ol> +<li>On the first occurrence of an error, the user-defined <tt>p_error()</tt> function +is called with the offending token as an argument. Afterwards, the parser enters +an "error-recovery" mode in which it will not make future calls to <tt>p_error()</tt> until it +has successfully shifted at least 3 tokens onto the parsing stack. + +<p> +<li>If no recovery action is taken in <tt>p_error()</tt>, the offending lookahead token is replaced +with a special <tt>error</tt> token. + +<p> +<li>If the offending lookahead token is already set to <tt>error</tt>, the top item of the parsing stack is +deleted. + +<p> +<li>If the entire parsing stack is unwound, the parser enters a restart state and attempts to start +parsing from its initial state. + +<p> +<li>If a grammar rule accepts <tt>error</tt> as a token, it will be +shifted onto the parsing stack. + +<p> +<li>If the top item of the parsing stack is <tt>error</tt>, lookahead tokens will be discarded until the +parser can successfully shift a new symbol or reduce a rule involving <tt>error</tt>. +</ol> + +<h4>Recovery and resynchronization with error rules</h4> + +The most well-behaved approach for handling syntax errors is to write grammar rules that include the <tt>error</tt> +token. For example, suppose your language had a grammar rule for a print statement like this: + +<blockquote> +<pre> +def p_statement_print(t): + 'statement : PRINT expr SEMI' + ... +</pre> +</blockquote> + +To account for the possibility of a bad expression, you might write an additional grammar rule like this: + +<blockquote> +<pre> +def p_statement_print_error(t): + 'statement : PRINT error SEMI' + print "Syntax error in print statement. Bad expression" + +</pre> +</blockquote> + +In this case, the <tt>error</tt> token will match any sequence of +tokens that might appear up to the first semicolon that is +encountered. Once the semicolon is reached, the rule will be +invoked and the <tt>error</tt> token will go away. + +<p> +This type of recovery is sometimes known as parser resynchronization. +The <tt>error</tt> token acts as a wildcard for any bad input text and +the token immediately following <tt>error</tt> acts as a +synchronization token. + +<p> +It is important to note that the <tt>error</tt> token usually does not appear as the last token +on the right in an error rule. For example: + +<blockquote> +<pre> +def p_statement_print_error(t): + 'statement : PRINT error' + print "Syntax error in print statement. Bad expression" +</pre> +</blockquote> + +This is because the first bad token encountered will cause the rule to +be reduced--which may make it difficult to recover if more bad tokens +immediately follow. + +<h4>Panic mode recovery</h4> + +An alternative error recovery scheme is to enter a panic mode recovery in which tokens are +discarded to a point where the parser might be able to recover in some sensible manner. + +<p> +Panic mode recovery is implemented entirely in the <tt>p_error()</tt> function. For example, this +function starts discarding tokens until it reaches a closing '}'. Then, it restarts the +parser in its initial state. + +<blockquote> +<pre> +def p_error(t): + print "Whoa. You are seriously hosed." + # Read ahead looking for a closing '}' + while 1: + tok = yacc.token() # Get the next token + if not tok or tok.type == 'RBRACE': break + yacc.restart() +</pre> +</blockquote> + +<p> +This function simply discards the bad token and tells the parser that the error was ok. + +<blockquote> +<pre> +def p_error(t): + print "Syntax error at token", t.type + # Just discard the token and tell the parser it's okay. + yacc.errok() +</pre> +</blockquote> + +<P> +Within the <tt>p_error()</tt> function, three functions are available to control the behavior +of the parser: +<p> +<ul> +<li><tt>yacc.errok()</tt>. This resets the parser state so it doesn't think it's in error-recovery +mode. This will prevent an <tt>error</tt> token from being generated and will reset the internal +error counters so that the next syntax error will call <tt>p_error()</tt> again. + +<p> +<li><tt>yacc.token()</tt>. This returns the next token on the input stream. + +<p> +<li><tt>yacc.restart()</tt>. This discards the entire parsing stack and resets the parser +to its initial state. +</ul> + +Note: these functions are only available when invoking <tt>p_error()</tt> and are not available +at any other time. + +<p> +To supply the next lookahead token to the parser, <tt>p_error()</tt> can return a token. This might be +useful if trying to synchronize on special characters. For example: + +<blockquote> +<pre> +def p_error(t): + # Read ahead looking for a terminating ";" + while 1: + tok = yacc.token() # Get the next token + if not tok or tok.type == 'SEMI': break + yacc.errok() + + # Return SEMI to the parser as the next lookahead token + return tok +</pre> +</blockquote> + +<h4>General comments on error handling</h4> + +For normal types of languages, error recovery with error rules and resynchronization characters is probably the most reliable +technique. This is because you can instrument the grammar to catch errors at selected places where it is relatively easy +to recover and continue parsing. Panic mode recovery is really only useful in certain specialized applications where you might want +to discard huge portions of the input text to find a valid restart point. + +<h2>Line Number Tracking</h2> + +<tt>yacc.py</tt> automatically tracks line numbers for all of the grammar symbols and tokens it processes. To retrieve the line +numbers, two functions are used in grammar rules: + +<ul> +<li><tt>t.lineno(num)</tt>. Return the starting line number for symbol <em>num</em> +<li><tt>t.linespan(num)</tt>. Return a tuple (startline,endline) with the starting and ending line number for symbol <em>num</em>. +</ul> + +For example: + +<blockquote> +<pre> +def t_expression(t): + 'expression : expression PLUS expression' + t.lineno(1) # Line number of the left expression + t.lineno(2) # line number of the PLUS operator + t.lineno(3) # line number of the right expression + ... + start,end = t.linespan(3) # Start,end lines of the right expression + +</pre> +</blockquote> + +Since line numbers are managed internally by the parser, there is usually no need to modify the line +numbers. However, if you want to save the line numbers in a parse-tree node, you will need to make your own +private copy. + +<h2>AST Construction</h2> + +<tt>yacc.py</tt> provides no special functions for constructing an abstract syntax tree. However, such +construction is easy enough to do on your own. Simply create a data structure for abstract syntax tree nodes +and assign nodes to <tt>t[0]</tt> in each rule. + +For example: + +<blockquote> +<pre> +class Expr: pass + +class BinOp(Expr): + def __init__(self,left,op,right): + self.type = "binop" + self.left = left + self.right = right + self.op = op + +class Number(Expr): + def __init__(self,value): + self.type = "number" + self.value = value + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + + t[0] = BinOp(t[1],t[2],t[3]) + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = Number(t[1]) +</pre> +</blockquote> + +To simplify tree traversal, it may make sense to pick a very generic tree structure for your parse tree nodes. +For example: + +<blockquote> +<pre> +class Node: + def __init__(self,type,children=None,leaf=None): + self.type = type + if children: + self.children = children + else: + self.children = [ ] + self.leaf = leaf + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + + t[0] = Node("binop", [t[1],t[3]], t[2]) +</pre> +</blockquote> + +<h2>Yacc implementation notes</h2> + +<ul> +<li>By default, <tt>yacc.py</tt> relies on <tt>lex.py</tt> for tokenizing. However, an alternative tokenizer +can be supplied as follows: + +<blockquote> +<pre> +yacc.parse(lexer=x) +</pre> +</blockquote> +in this case, <tt>x</tt> must be a Lexer object that minimally has a <tt>x.token()</tt> method for retrieving the next +token. If an input string is given to <tt>yacc.parse()</tt>, the lexer must also have an <tt>x.input()</tt> method. + +<p> +<li>By default, the yacc generates tables in debugging mode (which produces the parser.out file and other output). +To disable this, use + +<blockquote> +<pre> +yacc.yacc(debug=0) +</pre> +</blockquote> + +<p> +<li>To change the name of the <tt>parsetab.py</tt> file, use: + +<blockquote> +<pre> +yacc.yacc(tabmodule="foo") +</pre> +</blockquote> + +<P> +<li>To print copious amounts of debugging during parsing, use: + +<blockquote> +<pre> +yacc.parse(debug=1) +</pre> +</blockquote> + +<p> +<li>The <tt>yacc.yacc()</tt> function really returns a parser object. If you want to support multiple +parsers in the same application, do this: + +<blockquote> +<pre> +p = yacc.yacc() +... +p.parse() +</pre> +</blockquote> + +Note: The function <tt>yacc.parse()</tt> is bound to the last parser that was generated. + +<p> +<li>Since the generation of the SLR tables is relatively expensive, previously generated tables are +cached and reused if possible. The decision to regenerate the tables is determined by taking an MD5 +checksum of all grammar rules and precedence rules. Only in the event of a mismatch are the tables regenerated. + +<p> +It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules +and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow +machine. Please be patient. + +<p> +<li>Since LR parsing is mostly driven by tables, the performance of the parser is largely independent of the +size of the grammar. The biggest bottlenecks will be the lexer and the complexity of your grammar rules. +</ul> + +<h2>Parser and Lexer State Management</h2> + +In advanced parsing applications, you may want to have multiple +parsers and lexers. Furthermore, the parser may want to control the +behavior of the lexer in some way. + +<p> +To do this, it is important to note that both the lexer and parser are +actually implemented as objects. These objects are returned by the +<tt>lex()</tt> and <tt>yacc()</tt> functions respectively. For example: + +<blockquote> +<pre> +lexer = lex.lex() # Return lexer object +parser = yacc.yacc() # Return parser object +</pre> +</blockquote> + +Within lexer and parser rules, these objects are also available. In the lexer, +the "lexer" attribute of a token refers to the lexer object in use. For example: + +<blockquote> +<pre> +def t_NUMBER(t): + r'\d+' + ... + print t.lexer # Show lexer object +</pre> +</blockquote> + +In the parser, the "lexer" and "parser" attributes refer to the lexer +and parser objects respectively. + +<blockquote> +<pre> +def p_expr_plus(t): + 'expr : expr PLUS expr' + ... + print t.parser # Show parser object + print t.lexer # Show lexer object +</pre> +</blockquote> + +If necessary, arbitrary attributes can be attached to the lexer or parser object. +For example, if you wanted to have different parsing modes, you could attach a mode +attribute to the parser object and look at it later. + +<h2>Using Python's Optimized Mode</h2> + +Because PLY uses information from doc-strings, parsing and lexing +information must be gathered while running the Python interpreter in +normal mode (i.e., not with the -O or -OO options). However, if you +specify optimized mode like this: + +<blockquote> +<pre> +lex.lex(optimize=1) +yacc.yacc(optimize=1) +</pre> +</blockquote> + +then PLY can later be used when Python runs in optimized mode. To make this work, +make sure you first run Python in normal mode. Once the lexing and parsing tables +have been generated the first time, run Python in optimized mode. PLY will use +the tables without the need for doc strings. + +<p> +Beware: running PLY in optimized mode disables a lot of error +checking. You should only do this when your project has stabilized +and you don't need to do any debugging. + +<h2>Where to go from here?</h2> + +The <tt>examples</tt> directory of the PLY distribution contains several simple examples. Please consult a +compilers textbook for the theory and underlying implementation details or LR parsing. + +</body> +</html> + + + + + + + diff --git a/ext/ply/example/ansic/README b/ext/ply/example/ansic/README new file mode 100644 index 000000000..e049d3b4e --- /dev/null +++ b/ext/ply/example/ansic/README @@ -0,0 +1,2 @@ +This example is incomplete. Was going to specify an ANSI C parser. +This is part of it. diff --git a/ext/ply/example/ansic/clex.py b/ext/ply/example/ansic/clex.py new file mode 100644 index 000000000..afd995208 --- /dev/null +++ b/ext/ply/example/ansic/clex.py @@ -0,0 +1,161 @@ +# ---------------------------------------------------------------------- +# clex.py +# +# A lexer for ANSI C. +# ---------------------------------------------------------------------- + +import lex + +# Reserved words +reserved = ( + 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', + 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', + 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', + 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', + ) + +tokens = reserved + ( + # Literals (identifier, integer constant, float constant, string constant, char const) + 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', + + # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', + 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', + 'LOR', 'LAND', 'LNOT', + 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', + + # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) + 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', + 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', + + # Increment/decrement (++,--) + 'PLUSPLUS', 'MINUSMINUS', + + # Structure dereference (->) + 'ARROW', + + # Conditional operator (?) + 'CONDOP', + + # Delimeters ( ) [ ] { } , . ; : + 'LPAREN', 'RPAREN', + 'LBRACKET', 'RBRACKET', + 'LBRACE', 'RBRACE', + 'COMMA', 'PERIOD', 'SEMI', 'COLON', + + # Ellipsis (...) + 'ELLIPSIS', + ) + +# Completely ignored characters +t_ignore = ' \t\x0c' + +# Newlines +def t_NEWLINE(t): + r'\n+' + t.lineno += t.value.count("\n") + +# Operators +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_MOD = r'%' +t_OR = r'\|' +t_AND = r'&' +t_NOT = r'~' +t_XOR = r'^' +t_LSHIFT = r'<<' +t_RSHIFT = r'>>' +t_LOR = r'\|\|' +t_LAND = r'&&' +t_LNOT = r'!' +t_LT = r'<' +t_GT = r'>' +t_LE = r'<=' +t_GE = r'>=' +t_EQ = r'==' +t_NE = r'!=' + +# Assignment operators + +t_EQUALS = r'=' +t_TIMESEQUAL = r'\*=' +t_DIVEQUAL = r'/=' +t_MODEQUAL = r'%=' +t_PLUSEQUAL = r'\+=' +t_MINUSEQUAL = r'-=' +t_LSHIFTEQUAL = r'<<=' +t_RSHIFTEQUAL = r'>>=' +t_ANDEQUAL = r'&=' +t_OREQUAL = r'\|=' +t_XOREQUAL = r'^=' + +# Increment/decrement +t_PLUSPLUS = r'\+\+' +t_MINUSMINUS = r'--' + +# -> +t_ARROW = r'->' + +# ? +t_CONDOP = r'\?' + +# Delimeters +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LBRACE = r'\{' +t_RBRACE = r'\}' +t_COMMA = r',' +t_PERIOD = r'\.' +t_SEMI = r';' +t_COLON = r':' +t_ELLIPSIS = r'\.\.\.' + +# Identifiers and reserved words + +reserved_map = { } +for r in reserved: + reserved_map[r.lower()] = r + +def t_ID(t): + r'[A-Za-z_][\w_]*' + t.type = reserved_map.get(t.value,"ID") + return t + +# Integer literal +t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' + +# Floating literal +t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' + +# String literal +t_SCONST = r'\"([^\\\n]|(\\.))*?\"' + +# Character constant 'c' or L'c' +t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' + +# Comments +def t_comment(t): + r' /\*(.|\n)*?\*/' + t.lineno += t.value.count('\n') + +# Preprocessor directive (ignored) +def t_preprocessor(t): + r'\#(.)*?\n' + t.lineno += 1 + +def t_error(t): + print "Illegal character %s" % repr(t.value[0]) + t.skip(1) + +lexer = lex.lex(optimize=1) +if __name__ == "__main__": + lex.runmain(lexer) + + + + + diff --git a/ext/ply/example/ansic/cparse.py b/ext/ply/example/ansic/cparse.py new file mode 100644 index 000000000..ddfd5c72b --- /dev/null +++ b/ext/ply/example/ansic/cparse.py @@ -0,0 +1,859 @@ +# ----------------------------------------------------------------------------- +# cparse.py +# +# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed. +# ----------------------------------------------------------------------------- + +import yacc +import clex + +# Get the token map +tokens = clex.tokens + +# translation-unit: + +def p_translation_unit_1(t): + 'translation_unit : external_declaration' + pass + +def p_translation_unit_2(t): + 'translation_unit : translation_unit external_declaration' + pass + +# external-declaration: + +def p_external_declaration_1(t): + 'external_declaration : function_definition' + pass + +def p_external_declaration_2(t): + 'external_declaration : declaration' + pass + +# function-definition: + +def p_function_definition_1(t): + 'function_definition : declaration_specifiers declarator declaration_list compound_statement' + pass + +def p_function_definition_2(t): + 'function_definition : declarator declaration_list compound_statement' + pass + +def p_function_definition_3(t): + 'function_definition : declarator compound_statement' + pass + +def p_function_definition_4(t): + 'function_definition : declaration_specifiers declarator compound_statement' + pass + +# declaration: + +def p_declaration_1(t): + 'declaration : declaration_specifiers init_declarator_list SEMI' + pass + +def p_declaration_2(t): + 'declaration : declaration_specifiers SEMI' + pass + +# declaration-list: + +def p_declaration_list_1(t): + 'declaration_list : declaration' + pass + +def p_declaration_list_2(t): + 'declaration_list : declaration_list declaration ' + pass + +# declaration-specifiers +def p_declaration_specifiers_1(t): + 'declaration_specifiers : storage_class_specifier declaration_specifiers' + pass + +def p_declaration_specifiers_2(t): + 'declaration_specifiers : type_specifier declaration_specifiers' + pass + +def p_declaration_specifiers_3(t): + 'declaration_specifiers : type_qualifier declaration_specifiers' + pass + +def p_declaration_specifiers_4(t): + 'declaration_specifiers : storage_class_specifier' + pass + +def p_declaration_specifiers_5(t): + 'declaration_specifiers : type_specifier' + pass + +def p_declaration_specifiers_6(t): + 'declaration_specifiers : type_qualifier' + pass + +# storage-class-specifier +def p_storage_class_specifier(t): + '''storage_class_specifier : AUTO + | REGISTER + | STATIC + | EXTERN + | TYPEDEF + ''' + pass + +# type-specifier: +def p_type_specifier(t): + '''type_specifier : VOID + | CHAR + | SHORT + | INT + | LONG + | FLOAT + | DOUBLE + | SIGNED + | UNSIGNED + | struct_or_union_specifier + | enum_specifier + | TYPEID + ''' + pass + +# type-qualifier: +def p_type_qualifier(t): + '''type_qualifier : CONST + | VOLATILE''' + pass + +# struct-or-union-specifier + +def p_struct_or_union_specifier_1(t): + 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE' + pass + +def p_struct_or_union_specifier_2(t): + 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE' + pass + +def p_struct_or_union_specifier_3(t): + 'struct_or_union_specifier : struct_or_union ID' + pass + +# struct-or-union: +def p_struct_or_union(t): + '''struct_or_union : STRUCT + | UNION + ''' + pass + +# struct-declaration-list: + +def p_struct_declaration_list_1(t): + 'struct_declaration_list : struct_declaration' + pass + +def p_struct_declaration_list_2(t): + 'struct_declaration_list : struct_declarator_list struct_declaration' + pass + +# init-declarator-list: + +def p_init_declarator_list_1(t): + 'init_declarator_list : init_declarator' + pass + +def p_init_declarator_list_2(t): + 'init_declarator_list : init_declarator_list COMMA init_declarator' + pass + +# init-declarator + +def p_init_declarator_1(t): + 'init_declarator : declarator' + pass + +def p_init_declarator_2(t): + 'init_declarator : declarator EQUALS initializer' + pass + +# struct-declaration: + +def p_struct_declaration(t): + 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI' + pass + +# specifier-qualifier-list: + +def p_specifier_qualifier_list_1(t): + 'specifier_qualifier_list : type_specifier specifier_qualifier_list' + pass + +def p_specifier_qualifier_list_2(t): + 'specifier_qualifier_list : type_specifier' + pass + +def p_specifier_qualifier_list_3(t): + 'specifier_qualifier_list : type_qualifier specifier_qualifier_list' + pass + +def p_specifier_qualifier_list_4(t): + 'specifier_qualifier_list : type_qualifier' + pass + +# struct-declarator-list: + +def p_struct_declarator_list_1(t): + 'struct_declarator_list : struct_declarator' + pass + +def p_struct_declarator_list_2(t): + 'struct_declarator_list : struct_declarator_list COMMA struct_declarator' + pass + +# struct-declarator: + +def p_struct_declarator_1(t): + 'struct_declarator : declarator' + pass + +def p_struct_declarator_2(t): + 'struct_declarator : declarator COLON constant_expression' + pass + +def p_struct_declarator_3(t): + 'struct_declarator : COLON constant_expression' + pass + +# enum-specifier: + +def p_enum_specifier_1(t): + 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE' + pass + +def p_enum_specifier_2(t): + 'enum_specifier : ENUM LBRACE enumerator_list RBRACE' + pass + +def p_enum_specifier_3(t): + 'enum_specifier : ENUM ID' + pass + +# enumerator_list: +def p_enumerator_list_1(t): + 'enumerator_list : enumerator' + pass + +def p_enumerator_list_2(t): + 'enumerator_list : enumerator_list COMMA enumerator' + pass + +# enumerator: +def p_enumerator_1(t): + 'enumerator : ID' + pass + +def p_enumerator_2(t): + 'enumerator : ID EQUALS constant_expression' + pass + +# declarator: + +def p_declarator_1(t): + 'declarator : pointer direct_declarator' + pass + +def p_declarator_2(t): + 'declarator : direct_declarator' + pass + +# direct-declarator: + +def p_direct_declarator_1(t): + 'direct_declarator : ID' + pass + +def p_direct_declarator_2(t): + 'direct_declarator : LPAREN declarator RPAREN' + pass + +def p_direct_declarator_3(t): + 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET' + pass + +def p_direct_declarator_4(t): + 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN ' + pass + +def p_direct_declarator_5(t): + 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN ' + pass + +def p_direct_declarator_6(t): + 'direct_declarator : direct_declarator LPAREN RPAREN ' + pass + +# pointer: +def p_pointer_1(t): + 'pointer : TIMES type_qualifier_list' + pass + +def p_pointer_2(t): + 'pointer : TIMES' + pass + +def p_pointer_3(t): + 'pointer : TIMES type_qualifier_list pointer' + pass + +def p_pointer_4(t): + 'pointer : TIMES pointer' + pass + +# type-qualifier-list: + +def p_type_qualifier_list_1(t): + 'type_qualifier_list : type_qualifier' + pass + +def p_type_qualifier_list_2(t): + 'type_qualifier_list : type_qualifier_list type_qualifier' + pass + +# parameter-type-list: + +def p_parameter_type_list_1(t): + 'parameter_type_list : parameter_list' + pass + +def p_parameter_type_list_2(t): + 'parameter_type_list : parameter_list COMMA ELLIPSIS' + pass + +# parameter-list: + +def p_parameter_list_1(t): + 'parameter_list : parameter_declaration' + pass + +def p_parameter_list_2(t): + 'parameter_list : parameter_list COMMA parameter_declaration' + pass + +# parameter-declaration: +def p_parameter_declaration_1(t): + 'parameter_declaration : declaration_specifiers declarator' + pass + +def p_parameter_declaration_2(t): + 'parameter_declaration : declaration_specifiers abstract_declarator_opt' + pass + +# identifier-list: +def p_identifier_list_1(t): + 'identifier_list : ID' + pass + +def p_identifier_list_2(t): + 'identifier_list : identifier_list COMMA ID' + pass + +# initializer: + +def p_initializer_1(t): + 'initializer : assignment_expression' + pass + +def p_initializer_2(t): + '''initializer : LBRACE initializer_list RBRACE + | LBRACE initializer_list COMMA RBRACE''' + pass + +# initializer-list: + +def p_initializer_list_1(t): + 'initializer_list : initializer' + pass + +def p_initializer_list_2(t): + 'initializer_list : initializer_list COMMA initializer' + pass + +# type-name: + +def p_type_name(t): + 'type_name : specifier_qualifier_list abstract_declarator_opt' + pass + +def p_abstract_declarator_opt_1(t): + 'abstract_declarator_opt : empty' + pass + +def p_abstract_declarator_opt_2(t): + 'abstract_declarator_opt : abstract_declarator' + pass + +# abstract-declarator: + +def p_abstract_declarator_1(t): + 'abstract_declarator : pointer ' + pass + +def p_abstract_declarator_2(t): + 'abstract_declarator : pointer direct_abstract_declarator' + pass + +def p_abstract_declarator_3(t): + 'abstract_declarator : direct_abstract_declarator' + pass + +# direct-abstract-declarator: + +def p_direct_abstract_declarator_1(t): + 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN' + pass + +def p_direct_abstract_declarator_2(t): + 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET' + pass + +def p_direct_abstract_declarator_3(t): + 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET' + pass + +def p_direct_abstract_declarator_4(t): + 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN' + pass + +def p_direct_abstract_declarator_5(t): + 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN' + pass + +# Optional fields in abstract declarators + +def p_constant_expression_opt_1(t): + 'constant_expression_opt : empty' + pass + +def p_constant_expression_opt_2(t): + 'constant_expression_opt : constant_expression' + pass + +def p_parameter_type_list_opt_1(t): + 'parameter_type_list_opt : empty' + pass + +def p_parameter_type_list_opt_2(t): + 'parameter_type_list_opt : parameter_type_list' + pass + +# statement: + +def p_statement(t): + ''' + statement : labeled_statement + | expression_statement + | compound_statement + | selection_statement + | iteration_statement + | jump_statement + ''' + pass + +# labeled-statement: + +def p_labeled_statement_1(t): + 'labeled_statement : ID COLON statement' + pass + +def p_labeled_statement_2(t): + 'labeled_statement : CASE constant_expression COLON statement' + pass + +def p_labeled_statement_3(t): + 'labeled_statement : DEFAULT COLON statement' + pass + +# expression-statement: +def p_expression_statement(t): + 'expression_statement : expression_opt SEMI' + pass + +# compound-statement: + +def p_compound_statement_1(t): + 'compound_statement : LBRACE declaration_list statement_list RBRACE' + pass + +def p_compound_statement_2(t): + 'compound_statement : LBRACE statement_list RBRACE' + pass + +def p_compound_statement_3(t): + 'compound_statement : LBRACE declaration_list RBRACE' + pass + +def p_compound_statement_4(t): + 'compound_statement : LBRACE RBRACE' + pass + +# statement-list: + +def p_statement_list_1(t): + 'statement_list : statement' + pass + +def p_statement_list_2(t): + 'statement_list : statement_list statement' + pass + +# selection-statement + +def p_selection_statement_1(t): + 'selection_statement : IF LPAREN expression RPAREN statement' + pass + +def p_selection_statement_2(t): + 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement ' + pass + +def p_selection_statement_3(t): + 'selection_statement : SWITCH LPAREN expression RPAREN statement ' + pass + +# iteration_statement: + +def p_iteration_statement_1(t): + 'iteration_statement : WHILE LPAREN expression RPAREN statement' + pass + +def p_iteration_statement_2(t): + 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement ' + pass + +def p_iteration_statement_3(t): + 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI' + pass + +# jump_statement: + +def p_jump_statement_1(t): + 'jump_statement : GOTO ID SEMI' + pass + +def p_jump_statement_2(t): + 'jump_statement : CONTINUE SEMI' + pass + +def p_jump_statement_3(t): + 'jump_statement : BREAK SEMI' + pass + +def p_jump_statement_4(t): + 'jump_statement : RETURN expression_opt SEMI' + pass + +def p_expression_opt_1(t): + 'expression_opt : empty' + pass + +def p_expression_opt_2(t): + 'expression_opt : expression' + pass + +# expression: +def p_expression_1(t): + 'expression : assignment_expression' + pass + +def p_expression_2(t): + 'expression : expression COMMA assignment_expression' + pass + +# assigment_expression: +def p_assignment_expression_1(t): + 'assignment_expression : conditional_expression' + pass + +def p_assignment_expression_2(t): + 'assignment_expression : unary_expression assignment_operator assignment_expression' + pass + +# assignment_operator: +def p_assignment_operator(t): + ''' + assignment_operator : EQUALS + | TIMESEQUAL + | DIVEQUAL + | MODEQUAL + | PLUSEQUAL + | MINUSEQUAL + | LSHIFTEQUAL + | RSHIFTEQUAL + | ANDEQUAL + | OREQUAL + | XOREQUAL + ''' + pass + +# conditional-expression +def p_conditional_expression_1(t): + 'conditional_expression : logical_or_expression' + pass + +def p_conditional_expression_2(t): + 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression ' + pass + +# constant-expression + +def p_constant_expression(t): + 'constant_expression : conditional_expression' + pass + +# logical-or-expression + +def p_logical_or_expression_1(t): + 'logical_or_expression : logical_and_expression' + pass + +def p_logical_or_expression_2(t): + 'logical_or_expression : logical_or_expression LOR logical_and_expression' + pass + +# logical-and-expression + +def p_logical_and_expression_1(t): + 'logical_and_expression : inclusive_or_expression' + pass + +def p_logical_and_expression_2(t): + 'logical_and_expression : logical_and_expression LAND inclusive_or_expression' + pass + +# inclusive-or-expression: + +def p_inclusive_or_expression_1(t): + 'inclusive_or_expression : exclusive_or_expression' + pass + +def p_inclusive_or_expression_2(t): + 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression' + pass + +# exclusive-or-expression: + +def p_exclusive_or_expression_1(t): + 'exclusive_or_expression : and_expression' + pass + +def p_exclusive_or_expression_2(t): + 'exclusive_or_expression : exclusive_or_expression XOR and_expression' + pass + +# AND-expression + +def p_and_expression_1(t): + 'and_expression : equality_expression' + pass + +def p_and_expression_2(t): + 'and_expression : and_expression AND equality_expression' + pass + + +# equality-expression: +def p_equality_expression_1(t): + 'equality_expression : relational_expression' + pass + +def p_equality_expression_2(t): + 'equality_expression : equality_expression EQ relational_expression' + pass + +def p_equality_expression_3(t): + 'equality_expression : equality_expression NE relational_expression' + pass + + +# relational-expression: +def p_relational_expression_1(t): + 'relational_expression : shift_expression' + pass + +def p_relational_expression_2(t): + 'relational_expression : relational_expression LT shift_expression' + pass + +def p_relational_expression_3(t): + 'relational_expression : relational_expression GT shift_expression' + pass + +def p_relational_expression_4(t): + 'relational_expression : relational_expression LE shift_expression' + pass + +def p_relational_expression_5(t): + 'relational_expression : relational_expression GE shift_expression' + pass + +# shift-expression + +def p_shift_expression_1(t): + 'shift_expression : additive_expression' + pass + +def p_shift_expression_2(t): + 'shift_expression : shift_expression LSHIFT additive_expression' + pass + +def p_shift_expression_3(t): + 'shift_expression : shift_expression RSHIFT additive_expression' + pass + +# additive-expression + +def p_additive_expression_1(t): + 'additive_expression : multiplicative_expression' + pass + +def p_additive_expression_2(t): + 'additive_expression : additive_expression PLUS multiplicative_expression' + pass + +def p_additive_expression_3(t): + 'additive_expression : additive_expression MINUS multiplicative_expression' + pass + +# multiplicative-expression + +def p_multiplicative_expression_1(t): + 'multiplicative_expression : cast_expression' + pass + +def p_multiplicative_expression_2(t): + 'multiplicative_expression : multiplicative_expression TIMES cast_expression' + pass + +def p_multiplicative_expression_3(t): + 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression' + pass + +def p_multiplicative_expression_4(t): + 'multiplicative_expression : multiplicative_expression MOD cast_expression' + pass + +# cast-expression: + +def p_cast_expression_1(t): + 'cast_expression : unary_expression' + pass + +def p_cast_expression_2(t): + 'cast_expression : LPAREN type_name RPAREN cast_expression' + pass + +# unary-expression: +def p_unary_expression_1(t): + 'unary_expression : postfix_expression' + pass + +def p_unary_expression_2(t): + 'unary_expression : PLUSPLUS unary_expression' + pass + +def p_unary_expression_3(t): + 'unary_expression : MINUSMINUS unary_expression' + pass + +def p_unary_expression_4(t): + 'unary_expression : unary_operator cast_expression' + pass + +def p_unary_expression_5(t): + 'unary_expression : SIZEOF unary_expression' + pass + +def p_unary_expression_6(t): + 'unary_expression : SIZEOF LPAREN type_name RPAREN' + pass + +#unary-operator +def p_unary_operator(t): + '''unary_operator : AND + | TIMES + | PLUS + | MINUS + | NOT + | LNOT ''' + pass + +# postfix-expression: +def p_postfix_expression_1(t): + 'postfix_expression : primary_expression' + pass + +def p_postfix_expression_2(t): + 'postfix_expression : postfix_expression LBRACKET expression RBRACKET' + pass + +def p_postfix_expression_3(t): + 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN' + pass + +def p_postfix_expression_4(t): + 'postfix_expression : postfix_expression LPAREN RPAREN' + pass + +def p_postfix_expression_5(t): + 'postfix_expression : postfix_expression PERIOD ID' + pass + +def p_postfix_expression_6(t): + 'postfix_expression : postfix_expression ARROW ID' + pass + +def p_postfix_expression_7(t): + 'postfix_expression : postfix_expression PLUSPLUS' + pass + +def p_postfix_expression_8(t): + 'postfix_expression : postfix_expression MINUSMINUS' + pass + +# primary-expression: +def p_primary_expression(t): + '''primary_expression : ID + | constant + | SCONST + | LPAREN expression RPAREN''' + pass + +# argument-expression-list: +def p_argument_expression_list(t): + '''argument_expression_list : assignment_expression + | argument_expression_list COMMA assignment_expression''' + pass + +# constant: +def p_constant(t): + '''constant : ICONST + | FCONST + | CCONST''' + pass + + +def p_empty(t): + 'empty : ' + pass + +def p_error(t): + print "Whoa. We're hosed" + +import profile +# Build the grammar +profile.run("yacc.yacc()") + + + + diff --git a/ext/ply/example/calc/calc.py b/ext/ply/example/calc/calc.py new file mode 100644 index 000000000..aeb23c246 --- /dev/null +++ b/ext/ply/example/calc/calc.py @@ -0,0 +1,108 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) diff --git a/ext/ply/example/hedit/hedit.py b/ext/ply/example/hedit/hedit.py new file mode 100644 index 000000000..f00427bf5 --- /dev/null +++ b/ext/ply/example/hedit/hedit.py @@ -0,0 +1,44 @@ +# ----------------------------------------------------------------------------- +# hedit.py +# +# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) +# +# These tokens can't be easily tokenized because they are of the following +# form: +# +# nHc1...cn +# +# where n is a positive integer and c1 ... cn are characters. +# +# This example shows how to modify the state of the lexer to parse +# such tokens +# ----------------------------------------------------------------------------- + +tokens = ( + 'H_EDIT_DESCRIPTOR', + ) + +# Tokens +t_ignore = " \t\n" + +def t_H_EDIT_DESCRIPTOR(t): + r"\d+H.*" # This grabs all of the remaining text + i = t.value.index('H') + n = eval(t.value[:i]) + + # Adjust the tokenizing position + t.lexer.lexpos -= len(t.value) - (i+1+n) + + t.value = t.value[i+1:i+1+n] + return t + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() +lex.runmain() + + diff --git a/ext/ply/example/optcalc/README b/ext/ply/example/optcalc/README new file mode 100644 index 000000000..6d196f0ee --- /dev/null +++ b/ext/ply/example/optcalc/README @@ -0,0 +1,9 @@ +An example showing how to use Python optimized mode. +To run: + + - First run 'python calc.py' + + - Then run 'python -OO calc.py' + +If working corretly, the second version should run the +same way. diff --git a/ext/ply/example/optcalc/calc.py b/ext/ply/example/optcalc/calc.py new file mode 100644 index 000000000..fa66cda5b --- /dev/null +++ b/ext/ply/example/optcalc/calc.py @@ -0,0 +1,110 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex(optimize=1) + +# Parsing rules + +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + elif t[2] == '<': t[0] = t[1] < t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc(optimize=1) + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + yacc.parse(s) + diff --git a/ext/ply/lex.py b/ext/ply/lex.py new file mode 100644 index 000000000..7ad7a394b --- /dev/null +++ b/ext/ply/lex.py @@ -0,0 +1,681 @@ +#----------------------------------------------------------------------------- +# ply: lex.py +# +# Author: David M. Beazley (beazley@cs.uchicago.edu) +# Department of Computer Science +# University of Chicago +# Chicago, IL 60637 +# +# Copyright (C) 2001, David M. Beazley +# +# $Header: /home/stever/bk/newmem2/ext/ply/lex.py 1.1 03/06/06 14:53:34-00:00 stever@ $ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +# +# +# This module automatically constructs a lexical analysis module from regular +# expression rules defined in a user-defined module. The idea is essentially the same +# as that used in John Aycock's Spark framework, but the implementation works +# at the module level rather than requiring the use of classes. +# +# This module tries to provide an interface that is closely modeled after +# the traditional lex interface in Unix. It also differs from Spark +# in that: +# +# - It provides more extensive error checking and reporting if +# the user supplies a set of regular expressions that can't +# be compiled or if there is any other kind of a problem in +# the specification. +# +# - The interface is geared towards LALR(1) and LR(1) parser +# generators. That is tokens are generated one at a time +# rather than being generated in advanced all in one step. +# +# There are a few limitations of this module +# +# - The module interface makes it somewhat awkward to support more +# than one lexer at a time. Although somewhat inelegant from a +# design perspective, this is rarely a practical concern for +# most compiler projects. +# +# - The lexer requires that the entire input text be read into +# a string before scanning. I suppose that most machines have +# enough memory to make this a minor issues, but it makes +# the lexer somewhat difficult to use in interactive sessions +# or with streaming data. +# +#----------------------------------------------------------------------------- + +r""" +lex.py + +This module builds lex-like scanners based on regular expression rules. +To use the module, simply write a collection of regular expression rules +and actions like this: + +# lexer.py +import lex + +# Define a list of valid tokens +tokens = ( + 'IDENTIFIER', 'NUMBER', 'PLUS', 'MINUS' + ) + +# Define tokens as functions +def t_IDENTIFIER(t): + r' ([a-zA-Z_](\w|_)* ' + return t + +def t_NUMBER(t): + r' \d+ ' + return t + +# Some simple tokens with no actions +t_PLUS = r'\+' +t_MINUS = r'-' + +# Initialize the lexer +lex.lex() + +The tokens list is required and contains a complete list of all valid +token types that the lexer is allowed to produce. Token types are +restricted to be valid identifiers. This means that 'MINUS' is a valid +token type whereas '-' is not. + +Rules are defined by writing a function with a name of the form +t_rulename. Each rule must accept a single argument which is +a token object generated by the lexer. This token has the following +attributes: + + t.type = type string of the token. This is initially set to the + name of the rule without the leading t_ + t.value = The value of the lexeme. + t.lineno = The value of the line number where the token was encountered + +For example, the t_NUMBER() rule above might be called with the following: + + t.type = 'NUMBER' + t.value = '42' + t.lineno = 3 + +Each rule returns the token object it would like to supply to the +parser. In most cases, the token t is returned with few, if any +modifications. To discard a token for things like whitespace or +comments, simply return nothing. For instance: + +def t_whitespace(t): + r' \s+ ' + pass + +For faster lexing, you can also define this in terms of the ignore set like this: + +t_ignore = ' \t' + +The characters in this string are ignored by the lexer. Use of this feature can speed +up parsing significantly since scanning will immediately proceed to the next token. + +lex requires that the token returned by each rule has an attribute +t.type. Other than this, rules are free to return any kind of token +object that they wish and may construct a new type of token object +from the attributes of t (provided the new object has the required +type attribute). + +If illegal characters are encountered, the scanner executes the +function t_error(t) where t is a token representing the rest of the +string that hasn't been matched. If this function isn't defined, a +LexError exception is raised. The .text attribute of this exception +object contains the part of the string that wasn't matched. + +The t.skip(n) method can be used to skip ahead n characters in the +input stream. This is usually only used in the error handling rule. +For instance, the following rule would print an error message and +continue: + +def t_error(t): + print "Illegal character in input %s" % t.value[0] + t.skip(1) + +Of course, a nice scanner might wish to skip more than one character +if the input looks very corrupted. + +The lex module defines a t.lineno attribute on each token that can be used +to track the current line number in the input. The value of this +variable is not modified by lex so it is up to your lexer module +to correctly update its value depending on the lexical properties +of the input language. To do this, you might write rules such as +the following: + +def t_newline(t): + r' \n+ ' + t.lineno += t.value.count("\n") + +To initialize your lexer so that it can be used, simply call the lex.lex() +function in your rule file. If there are any errors in your +specification, warning messages or an exception will be generated to +alert you to the problem. + +(dave: this needs to be rewritten) +To use the newly constructed lexer from another module, simply do +this: + + import lex + import lexer + plex.input("position = initial + rate*60") + + while 1: + token = plex.token() # Get a token + if not token: break # No more tokens + ... do whatever ... + +Assuming that the module 'lexer' has initialized plex as shown +above, parsing modules can safely import 'plex' without having +to import the rule file or any additional imformation about the +scanner you have defined. +""" + +# ----------------------------------------------------------------------------- + + +__version__ = "1.3" + +import re, types, sys, copy + +# Exception thrown when invalid token encountered and no default +class LexError(Exception): + def __init__(self,message,s): + self.args = (message,) + self.text = s + +# Token class +class LexToken: + def __str__(self): + return "LexToken(%s,%r,%d)" % (self.type,self.value,self.lineno) + def __repr__(self): + return str(self) + def skip(self,n): + try: + self._skipn += n + except AttributeError: + self._skipn = n + +# ----------------------------------------------------------------------------- +# Lexer class +# +# input() - Store a new string in the lexer +# token() - Get the next token +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexindexfunc = [ ] # Reverse mapping of groups to functions and types + self.lexerrorf = None # Error rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = None # Ignored characters + self.lineno = 1 # Current line number + self.debug = 0 # Debugging mode + self.optimize = 0 # Optimized mode + self.token = self.errtoken + + def __copy__(self): + c = Lexer() + c.lexre = self.lexre + c.lexdata = self.lexdata + c.lexpos = self.lexpos + c.lexlen = self.lexlen + c.lenindexfunc = self.lexindexfunc + c.lexerrorf = self.lexerrorf + c.lextokens = self.lextokens + c.lexignore = self.lexignore + c.lineno = self.lineno + c.optimize = self.optimize + c.token = c.realtoken + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self,s): + if not isinstance(s,types.StringType): + raise ValueError, "Expected a string" + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + self.token = self.realtoken + + # Change the token routine to point to realtoken() + global token + if token == self.errtoken: + token = self.token + + # ------------------------------------------------------------ + # errtoken() - Return error if token is called with no data + # ------------------------------------------------------------ + def errtoken(self): + raise RuntimeError, "No input string given with input()" + + # ------------------------------------------------------------ + # token() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def realtoken(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + m = self.lexre.match(lexdata,lexpos) + if m: + i = m.lastindex + lexpos = m.end() + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexer = self + func,tok.type = self.lexindexfunc[i] + if not func: + self.lexpos = lexpos + return tok + + # If token is processed by a function, call it + self.lexpos = lexpos + newtok = func(tok) + self.lineno = tok.lineno # Update line number + + # Every function must return a token, if nothing, we just move to next token + if not newtok: continue + + # Verify type of the token. If not in the token map, raise an error + if not self.optimize: + if not self.lextokens.has_key(newtok.type): + raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func.func_code.co_filename, func.func_code.co_firstlineno, + func.__name__, newtok.type),lexdata[lexpos:]) + + return newtok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = "error" + tok.lexer = self + oldpos = lexpos + newtok = self.lexerrorf(tok) + lexpos += getattr(tok,"_skipn",0) + if oldpos == lexpos: + # Error method didn't change text position at all. This is an error. + self.lexpos = lexpos + raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + if not newtok: continue + self.lexpos = lexpos + return newtok + + self.lexpos = lexpos + raise LexError, ("No match found", lexdata[lexpos:]) + + # No more input data + self.lexpos = lexpos + 1 + return None + + +# ----------------------------------------------------------------------------- +# validate_file() +# +# This checks to see if there are duplicated t_rulename() functions or strings +# in the parser input file. This is done using a simple regular expression +# match on each line in the filename. +# ----------------------------------------------------------------------------- + +def validate_file(filename): + import os.path + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea what the file is. Return OK + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if not m: + m = sre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev) + noerror = 0 + linen += 1 + return noerror + +# ----------------------------------------------------------------------------- +# _read_lextab(module) +# +# Reads lexer table from a lextab file instead of using introspection. +# ----------------------------------------------------------------------------- + +def _read_lextab(lexer, fdict, module): + exec "import %s as lextab" % module + lexer.lexre = re.compile(lextab._lexre, re.VERBOSE) + lexer.lexindexfunc = lextab._lextab + for i in range(len(lextab._lextab)): + t = lexer.lexindexfunc[i] + if t: + if t[0]: + lexer.lexindexfunc[i] = (fdict[t[0]],t[1]) + lexer.lextokens = lextab._lextokens + lexer.lexignore = lextab._lexignore + if lextab._lexerrorf: + lexer.lexerrorf = fdict[lextab._lexerrorf] + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None,debug=0,optimize=0,lextab="lextab"): + ldict = None + regex = "" + error = 0 + files = { } + lexer = Lexer() + lexer.debug = debug + lexer.optimize = optimize + global token,input + + if module: + if not isinstance(module, types.ModuleType): + raise ValueError,"Expected a module" + + ldict = module.__dict__ + + else: + # No module given. We might be able to get information from the caller. + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + if optimize and lextab: + try: + _read_lextab(lexer,ldict, lextab) + if not lexer.lexignore: lexer.lexignore = "" + token = lexer.token + input = lexer.input + return lexer + + except ImportError: + pass + + # Get the tokens map + tokens = ldict.get("tokens",None) + if not tokens: + raise SyntaxError,"lex: module does not define 'tokens'" + if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): + raise SyntaxError,"lex: tokens must be a list or tuple." + + # Build a dictionary of valid token names + lexer.lextokens = { } + if not optimize: + + # Utility function for verifying tokens + def is_identifier(s): + for c in s: + if not (c.isalnum() or c == '_'): return 0 + return 1 + + for n in tokens: + if not is_identifier(n): + print "lex: Bad token name '%s'" % n + error = 1 + if lexer.lextokens.has_key(n): + print "lex: Warning. Token '%s' multiply defined." % n + lexer.lextokens[n] = None + else: + for n in tokens: lexer.lextokens[n] = None + + + if debug: + print "lex: tokens = '%s'" % lexer.lextokens.keys() + + # Get a list of symbols with the t_ prefix + tsymbols = [f for f in ldict.keys() if f[:2] == 't_'] + + # Now build up a list of functions and a list of strings + fsymbols = [ ] + ssymbols = [ ] + for f in tsymbols: + if isinstance(ldict[f],types.FunctionType): + fsymbols.append(ldict[f]) + elif isinstance(ldict[f],types.StringType): + ssymbols.append((f,ldict[f])) + else: + print "lex: %s not defined as a function or string" % f + error = 1 + + # Sort the functions by line number + fsymbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) + + # Sort the strings by regular expression length + ssymbols.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) + + # Check for non-empty symbols + if len(fsymbols) == 0 and len(ssymbols) == 0: + raise SyntaxError,"lex: no rules of the form t_rulename are defined." + + # Add all of the rules defined with actions first + for f in fsymbols: + + line = f.func_code.co_firstlineno + file = f.func_code.co_filename + files[file] = None + + if not optimize: + if f.func_code.co_argcount > 1: + print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) + error = 1 + continue + + if f.func_code.co_argcount < 1: + print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) + error = 1 + continue + + if f.__name__ == 't_ignore': + print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__) + error = 1 + continue + + if f.__name__ == 't_error': + lexer.lexerrorf = f + continue + + if f.__doc__: + if not optimize: + try: + c = re.compile(f.__doc__, re.VERBOSE) + except re.error,e: + print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e) + error = 1 + continue + + if debug: + print "lex: Adding rule %s -> '%s'" % (f.__name__,f.__doc__) + + # Okay. The regular expression seemed okay. Let's append it to the master regular + # expression we're building + + if (regex): regex += "|" + regex += "(?P<%s>%s)" % (f.__name__,f.__doc__) + else: + print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__) + + # Now add all of the simple rules + for name,r in ssymbols: + + if name == 't_ignore': + lexer.lexignore = r + continue + + if not optimize: + if name == 't_error': + raise SyntaxError,"lex: Rule 't_error' must be defined as a function" + error = 1 + continue + + if not lexer.lextokens.has_key(name[2:]): + print "lex: Rule '%s' defined for an unspecified token %s." % (name,name[2:]) + error = 1 + continue + try: + c = re.compile(r,re.VERBOSE) + except re.error,e: + print "lex: Invalid regular expression for rule '%s'. %s" % (name,e) + error = 1 + continue + if debug: + print "lex: Adding rule %s -> '%s'" % (name,r) + + if regex: regex += "|" + regex += "(?P<%s>%s)" % (name,r) + + if not optimize: + for f in files.keys(): + if not validate_file(f): + error = 1 + try: + if debug: + print "lex: regex = '%s'" % regex + lexer.lexre = re.compile(regex, re.VERBOSE) + + # Build the index to function map for the matching engine + lexer.lexindexfunc = [ None ] * (max(lexer.lexre.groupindex.values())+1) + for f,i in lexer.lexre.groupindex.items(): + handle = ldict[f] + if isinstance(handle,types.FunctionType): + lexer.lexindexfunc[i] = (handle,handle.__name__[2:]) + else: + # If rule was specified as a string, we build an anonymous + # callback function to carry out the action + lexer.lexindexfunc[i] = (None,f[2:]) + + # If a lextab was specified, we create a file containing the precomputed + # regular expression and index table + + if lextab and optimize: + lt = open(lextab+".py","w") + lt.write("# %s.py. This file automatically created by PLY. Don't edit.\n" % lextab) + lt.write("_lexre = %s\n" % repr(regex)) + lt.write("_lextab = [\n"); + for i in range(0,len(lexer.lexindexfunc)): + t = lexer.lexindexfunc[i] + if t: + if t[0]: + lt.write(" ('%s',%s),\n"% (t[0].__name__, repr(t[1]))) + else: + lt.write(" (None,%s),\n" % repr(t[1])) + else: + lt.write(" None,\n") + + lt.write("]\n"); + lt.write("_lextokens = %s\n" % repr(lexer.lextokens)) + lt.write("_lexignore = %s\n" % repr(lexer.lexignore)) + if (lexer.lexerrorf): + lt.write("_lexerrorf = %s\n" % repr(lexer.lexerrorf.__name__)) + else: + lt.write("_lexerrorf = None\n") + lt.close() + + except re.error,e: + print "lex: Fatal error. Unable to compile regular expression rules. %s" % e + error = 1 + if error: + raise SyntaxError,"lex: Unable to build lexer." + if not lexer.lexerrorf: + print "lex: Warning. no t_error rule is defined." + + if not lexer.lexignore: lexer.lexignore = "" + + # Create global versions of the token() and input() functions + token = lexer.token + input = lexer.input + + return lexer + +# ----------------------------------------------------------------------------- +# run() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None,data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + print "Reading from standard input (type EOF to end):" + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while 1: + tok = _token() + if not tok: break + print "(%s,'%s',%d)" % (tok.type, tok.value, tok.lineno) + + + + diff --git a/ext/ply/test/README b/ext/ply/test/README new file mode 100644 index 000000000..bca748497 --- /dev/null +++ b/ext/ply/test/README @@ -0,0 +1,9 @@ +This directory mostly contains tests for various types of error +conditions. To run: + + $ python testlex.py . + $ python testyacc.py . + +(make sure lex.py and yacc.py exist in this directory before +running the tests). + diff --git a/ext/ply/test/calclex.py b/ext/ply/test/calclex.py new file mode 100644 index 000000000..f8eb91a09 --- /dev/null +++ b/ext/ply/test/calclex.py @@ -0,0 +1,46 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() + + + diff --git a/ext/ply/test/lex_doc1.exp b/ext/ply/test/lex_doc1.exp new file mode 100644 index 000000000..29381911d --- /dev/null +++ b/ext/ply/test/lex_doc1.exp @@ -0,0 +1 @@ +./lex_doc1.py:15: No regular expression defined for rule 't_NUMBER' diff --git a/ext/ply/test/lex_doc1.py b/ext/ply/test/lex_doc1.py new file mode 100644 index 000000000..fb0fb885e --- /dev/null +++ b/ext/ply/test/lex_doc1.py @@ -0,0 +1,27 @@ +# lex_token.py +# +# Missing documentation string + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(t): + pass + +def t_error(t): + pass + + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_dup1.exp b/ext/ply/test/lex_dup1.exp new file mode 100644 index 000000000..22bca3190 --- /dev/null +++ b/ext/ply/test/lex_dup1.exp @@ -0,0 +1,2 @@ +./lex_dup1.py:17: Rule t_NUMBER redefined. Previously defined on line 15 +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_dup1.py b/ext/ply/test/lex_dup1.py new file mode 100644 index 000000000..88bbe00e9 --- /dev/null +++ b/ext/ply/test/lex_dup1.py @@ -0,0 +1,27 @@ +# lex_token.py +# +# Duplicated rule specifiers + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_dup2.exp b/ext/ply/test/lex_dup2.exp new file mode 100644 index 000000000..883bdad46 --- /dev/null +++ b/ext/ply/test/lex_dup2.exp @@ -0,0 +1,2 @@ +./lex_dup2.py:19: Rule t_NUMBER redefined. Previously defined on line 15 +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_dup2.py b/ext/ply/test/lex_dup2.py new file mode 100644 index 000000000..65e0b21a2 --- /dev/null +++ b/ext/ply/test/lex_dup2.py @@ -0,0 +1,31 @@ +# lex_token.py +# +# Duplicated rule specifiers + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(t): + r'\d+' + pass + +def t_NUMBER(t): + r'\d+' + pass + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_dup3.exp b/ext/ply/test/lex_dup3.exp new file mode 100644 index 000000000..916612aa1 --- /dev/null +++ b/ext/ply/test/lex_dup3.exp @@ -0,0 +1,2 @@ +./lex_dup3.py:17: Rule t_NUMBER redefined. Previously defined on line 15 +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_dup3.py b/ext/ply/test/lex_dup3.py new file mode 100644 index 000000000..424101823 --- /dev/null +++ b/ext/ply/test/lex_dup3.py @@ -0,0 +1,29 @@ +# lex_token.py +# +# Duplicated rule specifiers + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_NUMBER(t): + r'\d+' + pass + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_empty.exp b/ext/ply/test/lex_empty.exp new file mode 100644 index 000000000..af38602d5 --- /dev/null +++ b/ext/ply/test/lex_empty.exp @@ -0,0 +1 @@ +SyntaxError: lex: no rules of the form t_rulename are defined. diff --git a/ext/ply/test/lex_empty.py b/ext/ply/test/lex_empty.py new file mode 100644 index 000000000..6472832f1 --- /dev/null +++ b/ext/ply/test/lex_empty.py @@ -0,0 +1,18 @@ +# lex_token.py +# +# No rules defined + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error1.exp b/ext/ply/test/lex_error1.exp new file mode 100644 index 000000000..baa19e5b3 --- /dev/null +++ b/ext/ply/test/lex_error1.exp @@ -0,0 +1 @@ +lex: Warning. no t_error rule is defined. diff --git a/ext/ply/test/lex_error1.py b/ext/ply/test/lex_error1.py new file mode 100644 index 000000000..ed7980346 --- /dev/null +++ b/ext/ply/test/lex_error1.py @@ -0,0 +1,22 @@ +# lex_token.py +# +# Missing t_error() rule + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error2.exp b/ext/ply/test/lex_error2.exp new file mode 100644 index 000000000..fb1b55c8b --- /dev/null +++ b/ext/ply/test/lex_error2.exp @@ -0,0 +1 @@ +SyntaxError: lex: Rule 't_error' must be defined as a function diff --git a/ext/ply/test/lex_error2.py b/ext/ply/test/lex_error2.py new file mode 100644 index 000000000..80020f72b --- /dev/null +++ b/ext/ply/test/lex_error2.py @@ -0,0 +1,24 @@ +# lex_token.py +# +# t_error defined, but not function + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_error = "foo" + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error3.exp b/ext/ply/test/lex_error3.exp new file mode 100644 index 000000000..936828f93 --- /dev/null +++ b/ext/ply/test/lex_error3.exp @@ -0,0 +1,2 @@ +./lex_error3.py:17: Rule 't_error' requires an argument. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_error3.py b/ext/ply/test/lex_error3.py new file mode 100644 index 000000000..46facf589 --- /dev/null +++ b/ext/ply/test/lex_error3.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# t_error defined as function, but with wrong # args + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_error4.exp b/ext/ply/test/lex_error4.exp new file mode 100644 index 000000000..242516576 --- /dev/null +++ b/ext/ply/test/lex_error4.exp @@ -0,0 +1,2 @@ +./lex_error4.py:17: Rule 't_error' has too many arguments. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_error4.py b/ext/ply/test/lex_error4.py new file mode 100644 index 000000000..d777fee84 --- /dev/null +++ b/ext/ply/test/lex_error4.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# t_error defined as function, but too many args + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t,s): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_hedit.exp b/ext/ply/test/lex_hedit.exp new file mode 100644 index 000000000..0b09827c6 --- /dev/null +++ b/ext/ply/test/lex_hedit.exp @@ -0,0 +1,3 @@ +(H_EDIT_DESCRIPTOR,'abc',1) +(H_EDIT_DESCRIPTOR,'abcdefghij',1) +(H_EDIT_DESCRIPTOR,'xy',1) diff --git a/ext/ply/test/lex_hedit.py b/ext/ply/test/lex_hedit.py new file mode 100644 index 000000000..68f9fcbd1 --- /dev/null +++ b/ext/ply/test/lex_hedit.py @@ -0,0 +1,44 @@ +# ----------------------------------------------------------------------------- +# hedit.py +# +# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) +# +# These tokens can't be easily tokenized because they are of the following +# form: +# +# nHc1...cn +# +# where n is a positive integer and c1 ... cn are characters. +# +# This example shows how to modify the state of the lexer to parse +# such tokens +# ----------------------------------------------------------------------------- + +tokens = ( + 'H_EDIT_DESCRIPTOR', + ) + +# Tokens +t_ignore = " \t\n" + +def t_H_EDIT_DESCRIPTOR(t): + r"\d+H.*" # This grabs all of the remaining text + i = t.value.index('H') + n = eval(t.value[:i]) + + # Adjust the tokenizing position + t.lexer.lexpos -= len(t.value) - (i+1+n) + t.value = t.value[i+1:i+1+n] + return t + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.skip(1) + +# Build the lexer +import lex +lex.lex() +lex.runmain(data="3Habc 10Habcdefghij 2Hxy") + + + diff --git a/ext/ply/test/lex_ignore.exp b/ext/ply/test/lex_ignore.exp new file mode 100644 index 000000000..c3b04a154 --- /dev/null +++ b/ext/ply/test/lex_ignore.exp @@ -0,0 +1,2 @@ +./lex_ignore.py:17: Rule 't_ignore' must be defined as a string. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_ignore.py b/ext/ply/test/lex_ignore.py new file mode 100644 index 000000000..49c303f81 --- /dev/null +++ b/ext/ply/test/lex_ignore.py @@ -0,0 +1,29 @@ +# lex_token.py +# +# Improperly specific ignore declaration + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_ignore(t): + ' \t' + pass + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_re1.exp b/ext/ply/test/lex_re1.exp new file mode 100644 index 000000000..634eefefe --- /dev/null +++ b/ext/ply/test/lex_re1.exp @@ -0,0 +1,2 @@ +lex: Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_re1.py b/ext/ply/test/lex_re1.py new file mode 100644 index 000000000..4a055ad72 --- /dev/null +++ b/ext/ply/test/lex_re1.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# Bad regular expression in a string + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'(\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_rule1.exp b/ext/ply/test/lex_rule1.exp new file mode 100644 index 000000000..0c23ca294 --- /dev/null +++ b/ext/ply/test/lex_rule1.exp @@ -0,0 +1,2 @@ +lex: t_NUMBER not defined as a function or string +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_rule1.py b/ext/ply/test/lex_rule1.py new file mode 100644 index 000000000..ff3764ea1 --- /dev/null +++ b/ext/ply/test/lex_rule1.py @@ -0,0 +1,25 @@ +# lex_token.py +# +# Rule defined as some other type + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = 1 + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token1.exp b/ext/ply/test/lex_token1.exp new file mode 100644 index 000000000..3792831fa --- /dev/null +++ b/ext/ply/test/lex_token1.exp @@ -0,0 +1 @@ +SyntaxError: lex: module does not define 'tokens' diff --git a/ext/ply/test/lex_token1.py b/ext/ply/test/lex_token1.py new file mode 100644 index 000000000..e8eca2b63 --- /dev/null +++ b/ext/ply/test/lex_token1.py @@ -0,0 +1,19 @@ +# lex_token.py +# +# Tests for absence of tokens variable + +import lex + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token2.exp b/ext/ply/test/lex_token2.exp new file mode 100644 index 000000000..3f98fe51d --- /dev/null +++ b/ext/ply/test/lex_token2.exp @@ -0,0 +1 @@ +SyntaxError: lex: tokens must be a list or tuple. diff --git a/ext/ply/test/lex_token2.py b/ext/ply/test/lex_token2.py new file mode 100644 index 000000000..38b34dabe --- /dev/null +++ b/ext/ply/test/lex_token2.py @@ -0,0 +1,21 @@ +# lex_token.py +# +# Tests for tokens of wrong type + +import lex + +tokens = "PLUS MINUS NUMBER" + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token3.exp b/ext/ply/test/lex_token3.exp new file mode 100644 index 000000000..d991d3c37 --- /dev/null +++ b/ext/ply/test/lex_token3.exp @@ -0,0 +1,2 @@ +lex: Rule 't_MINUS' defined for an unspecified token MINUS. +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_token3.py b/ext/ply/test/lex_token3.py new file mode 100644 index 000000000..909f9180d --- /dev/null +++ b/ext/ply/test/lex_token3.py @@ -0,0 +1,24 @@ +# lex_token.py +# +# tokens is right type, but is missing a token for one rule + +import lex + +tokens = [ + "PLUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token4.exp b/ext/ply/test/lex_token4.exp new file mode 100644 index 000000000..3dd88e05a --- /dev/null +++ b/ext/ply/test/lex_token4.exp @@ -0,0 +1,2 @@ +lex: Bad token name '-' +SyntaxError: lex: Unable to build lexer. diff --git a/ext/ply/test/lex_token4.py b/ext/ply/test/lex_token4.py new file mode 100644 index 000000000..d77d1662c --- /dev/null +++ b/ext/ply/test/lex_token4.py @@ -0,0 +1,26 @@ +# lex_token.py +# +# Bad token name + +import lex + +tokens = [ + "PLUS", + "MINUS", + "-", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() + + diff --git a/ext/ply/test/lex_token5.exp b/ext/ply/test/lex_token5.exp new file mode 100644 index 000000000..d7bcb2e7c --- /dev/null +++ b/ext/ply/test/lex_token5.exp @@ -0,0 +1 @@ +lex.LexError: ./lex_token5.py:16: Rule 't_NUMBER' returned an unknown token type 'NUM' diff --git a/ext/ply/test/lex_token5.py b/ext/ply/test/lex_token5.py new file mode 100644 index 000000000..d9b0c96aa --- /dev/null +++ b/ext/ply/test/lex_token5.py @@ -0,0 +1,31 @@ +# lex_token.py +# +# Return a bad token name + +import lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' + +def t_NUMBER(t): + r'\d+' + t.type = "NUM" + return t + +def t_error(t): + pass + +import sys +sys.tracebacklimit = 0 + +lex.lex() +lex.input("1234") +t = lex.token() + + diff --git a/ext/ply/test/testlex.py b/ext/ply/test/testlex.py new file mode 100755 index 000000000..df000b83d --- /dev/null +++ b/ext/ply/test/testlex.py @@ -0,0 +1,57 @@ +#!/usr/local/bin +# ---------------------------------------------------------------------- +# testlex.py +# +# Run tests for the lexing module +# ---------------------------------------------------------------------- + +import sys,os,glob + +if len(sys.argv) < 2: + print "Usage: python testlex.py directory" + raise SystemExit + +dirname = None +make = 0 + +for o in sys.argv[1:]: + if o == '-make': + make = 1 + else: + dirname = o + break + +if not dirname: + print "Usage: python testlex.py [-make] directory" + raise SystemExit + +f = glob.glob("%s/%s" % (dirname,"lex_*.py")) + +print "**** Running tests for lex ****" + +for t in f: + name = t[:-3] + print "Testing %-32s" % name, + if make: + if not os.path.exists("%s.exp" % name): + os.system("python %s.py >%s.exp 2>&1" % (name,name)) + passed = 1 + else: + os.system("python %s.py >%s.out 2>&1" % (name,name)) + a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name)) + if a == 0: + passed = 1 + else: + passed = 0 + + if passed: + print "Passed" + else: + print "Failed. See %s.dif" % name + + + + + + + diff --git a/ext/ply/test/testyacc.py b/ext/ply/test/testyacc.py new file mode 100644 index 000000000..a185cbb29 --- /dev/null +++ b/ext/ply/test/testyacc.py @@ -0,0 +1,58 @@ +#!/usr/local/bin +# ---------------------------------------------------------------------- +# testyacc.py +# +# Run tests for the yacc module +# ---------------------------------------------------------------------- + +import sys,os,glob + +if len(sys.argv) < 2: + print "Usage: python testyacc.py directory" + raise SystemExit + +dirname = None +make = 0 + +for o in sys.argv[1:]: + if o == '-make': + make = 1 + else: + dirname = o + break + +if not dirname: + print "Usage: python testyacc.py [-make] directory" + raise SystemExit + +f = glob.glob("%s/%s" % (dirname,"yacc_*.py")) + +print "**** Running tests for yacc ****" + +for t in f: + name = t[:-3] + print "Testing %-32s" % name, + os.system("rm -f %s/parsetab.*" % dirname) + if make: + if not os.path.exists("%s.exp" % name): + os.system("python %s.py >%s.exp 2>&1" % (name,name)) + passed = 1 + else: + os.system("python %s.py >%s.out 2>&1" % (name,name)) + a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name)) + if a == 0: + passed = 1 + else: + passed = 0 + + if passed: + print "Passed" + else: + print "Failed. See %s.dif" % name + + + + + + + diff --git a/ext/ply/test/yacc_badargs.exp b/ext/ply/test/yacc_badargs.exp new file mode 100644 index 000000000..b145c51f2 --- /dev/null +++ b/ext/ply/test/yacc_badargs.exp @@ -0,0 +1,3 @@ +./yacc_badargs.py:21: Rule 'p_statement_assign' has too many arguments. +./yacc_badargs.py:25: Rule 'p_statement_expr' requires an argument. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_badargs.py b/ext/ply/test/yacc_badargs.py new file mode 100644 index 000000000..12075efcc --- /dev/null +++ b/ext/ply/test/yacc_badargs.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_badargs.py +# +# Rules with wrong # args +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t,s): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badprec.exp b/ext/ply/test/yacc_badprec.exp new file mode 100644 index 000000000..7764b0246 --- /dev/null +++ b/ext/ply/test/yacc_badprec.exp @@ -0,0 +1 @@ +yacc.YaccError: precedence must be a list or tuple. diff --git a/ext/ply/test/yacc_badprec.py b/ext/ply/test/yacc_badprec.py new file mode 100644 index 000000000..55bf7720d --- /dev/null +++ b/ext/ply/test/yacc_badprec.py @@ -0,0 +1,63 @@ +# ----------------------------------------------------------------------------- +# yacc_badprec.py +# +# Bad precedence specifier +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = "blah" + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badprec2.exp b/ext/ply/test/yacc_badprec2.exp new file mode 100644 index 000000000..1df1427b2 --- /dev/null +++ b/ext/ply/test/yacc_badprec2.exp @@ -0,0 +1,3 @@ +yacc: Invalid precedence table. +yacc: Generating SLR parsing table... +yacc: 4 shift/reduce conflicts diff --git a/ext/ply/test/yacc_badprec2.py b/ext/ply/test/yacc_badprec2.py new file mode 100644 index 000000000..9cbc99827 --- /dev/null +++ b/ext/ply/test/yacc_badprec2.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_badprec2.py +# +# Bad precedence +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + 42, + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badrule.exp b/ext/ply/test/yacc_badrule.exp new file mode 100644 index 000000000..553779778 --- /dev/null +++ b/ext/ply/test/yacc_badrule.exp @@ -0,0 +1,5 @@ +./yacc_badrule.py:22: Syntax error. Expected ':' +./yacc_badrule.py:26: Syntax error in rule 'statement' +./yacc_badrule.py:31: Syntax error. Expected ':' +./yacc_badrule.py:40: Syntax error. Expected ':' +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_badrule.py b/ext/ply/test/yacc_badrule.py new file mode 100644 index 000000000..cad3a967e --- /dev/null +++ b/ext/ply/test/yacc_badrule.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_badrule.py +# +# Syntax problems in the rule strings +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression: MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_badtok.exp b/ext/ply/test/yacc_badtok.exp new file mode 100644 index 000000000..f6e64726c --- /dev/null +++ b/ext/ply/test/yacc_badtok.exp @@ -0,0 +1 @@ +yacc.YaccError: tokens must be a list or tuple. diff --git a/ext/ply/test/yacc_badtok.py b/ext/ply/test/yacc_badtok.py new file mode 100644 index 000000000..a17d26aaa --- /dev/null +++ b/ext/ply/test/yacc_badtok.py @@ -0,0 +1,68 @@ +# ----------------------------------------------------------------------------- +# yacc_badtok.py +# +# A grammar, but tokens is a bad datatype +# ----------------------------------------------------------------------------- + +import sys +sys.tracebacklimit = 0 + +tokens = "Hello" + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_dup.exp b/ext/ply/test/yacc_dup.exp new file mode 100644 index 000000000..99f3fe22c --- /dev/null +++ b/ext/ply/test/yacc_dup.exp @@ -0,0 +1,4 @@ +./yacc_dup.py:25: Function p_statement redefined. Previously defined on line 21 +yacc: Warning. Token 'EQUALS' defined, but not used. +yacc: Warning. There is 1 unused token. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_dup.py b/ext/ply/test/yacc_dup.py new file mode 100644 index 000000000..557cd0ae1 --- /dev/null +++ b/ext/ply/test/yacc_dup.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_dup.py +# +# Duplicated rule name +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_error1.exp b/ext/ply/test/yacc_error1.exp new file mode 100644 index 000000000..980fc905c --- /dev/null +++ b/ext/ply/test/yacc_error1.exp @@ -0,0 +1 @@ +yacc.YaccError: ./yacc_error1.py:59: p_error() requires 1 argument. diff --git a/ext/ply/test/yacc_error1.py b/ext/ply/test/yacc_error1.py new file mode 100644 index 000000000..413004520 --- /dev/null +++ b/ext/ply/test/yacc_error1.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t,s): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_error2.exp b/ext/ply/test/yacc_error2.exp new file mode 100644 index 000000000..d0573b4dd --- /dev/null +++ b/ext/ply/test/yacc_error2.exp @@ -0,0 +1 @@ +yacc.YaccError: ./yacc_error2.py:59: p_error() requires 1 argument. diff --git a/ext/ply/test/yacc_error2.py b/ext/ply/test/yacc_error2.py new file mode 100644 index 000000000..d4fd1d219 --- /dev/null +++ b/ext/ply/test/yacc_error2.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_error3.exp b/ext/ply/test/yacc_error3.exp new file mode 100644 index 000000000..31eaee754 --- /dev/null +++ b/ext/ply/test/yacc_error3.exp @@ -0,0 +1 @@ +yacc.YaccError: 'p_error' defined, but is not a function. diff --git a/ext/ply/test/yacc_error3.py b/ext/ply/test/yacc_error3.py new file mode 100644 index 000000000..7093fab48 --- /dev/null +++ b/ext/ply/test/yacc_error3.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +p_error = "blah" + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_inf.exp b/ext/ply/test/yacc_inf.exp new file mode 100644 index 000000000..a7f47dada --- /dev/null +++ b/ext/ply/test/yacc_inf.exp @@ -0,0 +1,5 @@ +yacc: Warning. Token 'NUMBER' defined, but not used. +yacc: Warning. There is 1 unused token. +yacc: Infinite recursion detected for symbol 'statement'. +yacc: Infinite recursion detected for symbol 'expression'. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_inf.py b/ext/ply/test/yacc_inf.py new file mode 100644 index 000000000..885e2c4df --- /dev/null +++ b/ext/ply/test/yacc_inf.py @@ -0,0 +1,55 @@ +# ----------------------------------------------------------------------------- +# yacc_inf.py +# +# Infinite recursion +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_missing1.exp b/ext/ply/test/yacc_missing1.exp new file mode 100644 index 000000000..065d6a54a --- /dev/null +++ b/ext/ply/test/yacc_missing1.exp @@ -0,0 +1,2 @@ +./yacc_missing1.py:22: Symbol 'location' used, but not defined as a token or a rule. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_missing1.py b/ext/ply/test/yacc_missing1.py new file mode 100644 index 000000000..e63904d0e --- /dev/null +++ b/ext/ply/test/yacc_missing1.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_missing1.py +# +# Grammar with a missing rule +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : location EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_nodoc.exp b/ext/ply/test/yacc_nodoc.exp new file mode 100644 index 000000000..3f52a3287 --- /dev/null +++ b/ext/ply/test/yacc_nodoc.exp @@ -0,0 +1,2 @@ +./yacc_nodoc.py:25: No documentation string specified in function 'p_statement_expr' +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_nodoc.py b/ext/ply/test/yacc_nodoc.py new file mode 100644 index 000000000..e3941bdaa --- /dev/null +++ b/ext/ply/test/yacc_nodoc.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_nodoc.py +# +# Rule with a missing doc-string +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_noerror.exp b/ext/ply/test/yacc_noerror.exp new file mode 100644 index 000000000..986fa31fa --- /dev/null +++ b/ext/ply/test/yacc_noerror.exp @@ -0,0 +1,2 @@ +yacc: Warning. no p_error() function is defined. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_noerror.py b/ext/ply/test/yacc_noerror.py new file mode 100644 index 000000000..d92f48ea6 --- /dev/null +++ b/ext/ply/test/yacc_noerror.py @@ -0,0 +1,64 @@ +# ----------------------------------------------------------------------------- +# yacc_noerror.py +# +# No p_error() rule defined. +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_nop.exp b/ext/ply/test/yacc_nop.exp new file mode 100644 index 000000000..062878b9e --- /dev/null +++ b/ext/ply/test/yacc_nop.exp @@ -0,0 +1,2 @@ +./yacc_nop.py:25: Warning. Possible grammar rule 'statement_expr' defined without p_ prefix. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_nop.py b/ext/ply/test/yacc_nop.py new file mode 100644 index 000000000..c599ffd5d --- /dev/null +++ b/ext/ply/test/yacc_nop.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_nop.py +# +# Possible grammar rule defined without p_ prefix +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_notfunc.exp b/ext/ply/test/yacc_notfunc.exp new file mode 100644 index 000000000..271167341 --- /dev/null +++ b/ext/ply/test/yacc_notfunc.exp @@ -0,0 +1,4 @@ +yacc: Warning. 'p_statement_assign' not defined as a function +yacc: Warning. Token 'EQUALS' defined, but not used. +yacc: Warning. There is 1 unused token. +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_notfunc.py b/ext/ply/test/yacc_notfunc.py new file mode 100644 index 000000000..f61663d60 --- /dev/null +++ b/ext/ply/test/yacc_notfunc.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_notfunc.py +# +# p_rule not defined as a function +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +p_statement_assign = "Blah" + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_notok.exp b/ext/ply/test/yacc_notok.exp new file mode 100644 index 000000000..708f6f597 --- /dev/null +++ b/ext/ply/test/yacc_notok.exp @@ -0,0 +1 @@ +yacc.YaccError: module does not define a list 'tokens' diff --git a/ext/ply/test/yacc_notok.py b/ext/ply/test/yacc_notok.py new file mode 100644 index 000000000..dfa0059be --- /dev/null +++ b/ext/ply/test/yacc_notok.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_notok.py +# +# A grammar, but we forgot to import the tokens list +# ----------------------------------------------------------------------------- + +import sys +sys.tracebacklimit = 0 + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_rr.exp b/ext/ply/test/yacc_rr.exp new file mode 100644 index 000000000..0ec556d16 --- /dev/null +++ b/ext/ply/test/yacc_rr.exp @@ -0,0 +1,2 @@ +yacc: Generating SLR parsing table... +yacc: 1 reduce/reduce conflict diff --git a/ext/ply/test/yacc_rr.py b/ext/ply/test/yacc_rr.py new file mode 100644 index 000000000..c061c2c17 --- /dev/null +++ b/ext/ply/test/yacc_rr.py @@ -0,0 +1,71 @@ +# ----------------------------------------------------------------------------- +# yacc_rr.py +# +# A grammar with a reduce/reduce conflict +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_assign_2(t): + 'statement : NAME EQUALS NUMBER' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_simple.exp b/ext/ply/test/yacc_simple.exp new file mode 100644 index 000000000..de7964b6f --- /dev/null +++ b/ext/ply/test/yacc_simple.exp @@ -0,0 +1 @@ +yacc: Generating SLR parsing table... diff --git a/ext/ply/test/yacc_simple.py b/ext/ply/test/yacc_simple.py new file mode 100644 index 000000000..7b4b40b17 --- /dev/null +++ b/ext/ply/test/yacc_simple.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_sr.exp b/ext/ply/test/yacc_sr.exp new file mode 100644 index 000000000..7225ad94b --- /dev/null +++ b/ext/ply/test/yacc_sr.exp @@ -0,0 +1,2 @@ +yacc: Generating SLR parsing table... +yacc: 20 shift/reduce conflicts diff --git a/ext/ply/test/yacc_sr.py b/ext/ply/test/yacc_sr.py new file mode 100644 index 000000000..4341f6997 --- /dev/null +++ b/ext/ply/test/yacc_sr.py @@ -0,0 +1,62 @@ +# ----------------------------------------------------------------------------- +# yacc_sr.py +# +# A grammar with shift-reduce conflicts +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_term1.exp b/ext/ply/test/yacc_term1.exp new file mode 100644 index 000000000..422d2bacd --- /dev/null +++ b/ext/ply/test/yacc_term1.exp @@ -0,0 +1,2 @@ +./yacc_term1.py:22: Illegal rule name 'NUMBER'. Already defined as a token. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_term1.py b/ext/ply/test/yacc_term1.py new file mode 100644 index 000000000..97a2e7a60 --- /dev/null +++ b/ext/ply/test/yacc_term1.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_term1.py +# +# Terminal used on the left-hand-side +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'NUMBER : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_unused.exp b/ext/ply/test/yacc_unused.exp new file mode 100644 index 000000000..390754de3 --- /dev/null +++ b/ext/ply/test/yacc_unused.exp @@ -0,0 +1,4 @@ +./yacc_unused.py:60: Symbol 'COMMA' used, but not defined as a token or a rule. +yacc: Symbol 'COMMA' is unreachable. +yacc: Symbol 'exprlist' is unreachable. +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_unused.py b/ext/ply/test/yacc_unused.py new file mode 100644 index 000000000..4cbd63327 --- /dev/null +++ b/ext/ply/test/yacc_unused.py @@ -0,0 +1,76 @@ +# ----------------------------------------------------------------------------- +# yacc_unused.py +# +# A grammar with an unused rule +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_expr_list(t): + 'exprlist : exprlist COMMA expression' + pass + +def p_expr_list_2(t): + 'exprlist : expression' + pass + + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/test/yacc_uprec.exp b/ext/ply/test/yacc_uprec.exp new file mode 100644 index 000000000..b1a71a250 --- /dev/null +++ b/ext/ply/test/yacc_uprec.exp @@ -0,0 +1,2 @@ +./yacc_uprec.py:35: Nothing known about the precedence of 'UMINUS' +yacc.YaccError: Unable to construct parser. diff --git a/ext/ply/test/yacc_uprec.py b/ext/ply/test/yacc_uprec.py new file mode 100644 index 000000000..139ce6318 --- /dev/null +++ b/ext/ply/test/yacc_uprec.py @@ -0,0 +1,62 @@ +# ----------------------------------------------------------------------------- +# yacc_uprec.py +# +# A grammar with a bad %prec specifier +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +from calclex import tokens + +# Parsing rules + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print "Undefined name '%s'" % t[1] + t[0] = 0 + +def p_error(t): + print "Syntax error at '%s'" % t.value + +import yacc +yacc.yacc() + + + + diff --git a/ext/ply/yacc.py b/ext/ply/yacc.py new file mode 100644 index 000000000..1041745ed --- /dev/null +++ b/ext/ply/yacc.py @@ -0,0 +1,1846 @@ +#----------------------------------------------------------------------------- +# ply: yacc.py +# +# Author: David M. Beazley (beazley@cs.uchicago.edu) +# Department of Computer Science +# University of Chicago +# Chicago, IL 60637 +# +# Copyright (C) 2001, David M. Beazley +# +# $Header: /home/stever/bk/newmem2/ext/ply/yacc.py 1.3 03/06/06 14:59:28-00:00 stever@ $ +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# See the file COPYING for a complete copy of the LGPL. +# +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. Roughly speaking, this module is a cross between +# John Aycock's Spark system and the GNU bison utility. +# +# Disclaimer: This is a work in progress. SLR parsing seems to work fairly +# well and there is extensive error checking. LALR(1) is in progress. The +# rest of this file is a bit of a mess. Please pardon the dust. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +#----------------------------------------------------------------------------- + +__version__ = "1.3" + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = 1 # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +tab_module = 'parsetab' # Default name of the table module +default_lr = 'SLR' # Default LR table generation method + +error_count = 3 # Number of symbols that must be shifted to leave recovery mode + +import re, types, sys, cStringIO, md5, os.path + +# Exception raised for yacc-related errors +class YaccError(Exception): pass + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) + +class YaccSymbol: + def __str__(self): return self.type + def __repr__(self): return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. + +class YaccSlice: + def __init__(self,s): + self.slice = s + self.pbstack = [] + + def __getitem__(self,n): + return self.slice[n].value + + def __setitem__(self,n,v): + self.slice[n].value = v + + def __len__(self): + return len(self.slice) + + def lineno(self,n): + return getattr(self.slice[n],"lineno",0) + + def linespan(self,n): + startline = getattr(self.slice[n],"lineno",0) + endline = getattr(self.slice[n],"endlineno",startline) + return startline,endline + + def pushback(self,n): + if n <= 0: + raise ValueError, "Expected a positive value" + if n > (len(self.slice)-1): + raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) + for i in range(0,n): + self.pbstack.append(self.slice[-i-1]) + +# The LR Parsing engine. This is defined as a class so that multiple parsers +# can exist in the same process. A user never instantiates this directly. +# Instead, the global yacc() function should be used to create a suitable Parser +# object. + +class Parser: + def __init__(self,magic=None): + + # This is a hack to keep users from trying to instantiate a Parser + # object directly. + + if magic != "xyzzy": + raise YaccError, "Can't instantiate Parser. Use yacc() instead." + + # Reset internal state + self.productions = None # List of productions + self.errorfunc = None # Error handling function + self.action = { } # LR Action table + self.goto = { } # LR goto table + self.require = { } # Attribute require table + self.method = "Unknown LR" # Table construction method used + + def errok(self): + self.errorcount = 0 + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$' + self.symstack.append(sym) + self.statestack.append(0) + + def parse(self,input=None,lexer=None,debug=0): + lookahead = None # Current lookahead symbol + lookaheadstack = [ ] # Stack of lookahead symbols + actions = self.action # Local reference to action table + goto = self.goto # Local reference to goto table + prod = self.productions # Local reference to production list + pslice = YaccSlice(None) # Slice object passed to grammar rules + pslice.parser = self # Parser object + self.errorcount = 0 # Used during error recovery + + # If no lexer was given, we will try to use the lex module + if not lexer: + import lex as lexer + + pslice.lexer = lexer + + # If input was supplied, pass to lexer + if input: + lexer.input(input) + + # Tokenize function + get_token = lexer.token + + statestack = [ ] # Stack of parsing states + self.statestack = statestack + symstack = [ ] # Stack of grammar symbols + self.symstack = symstack + + errtoken = None # Err token + + # The start state is assumed to be (0,$) + statestack.append(0) + sym = YaccSymbol() + sym.type = '$' + symstack.append(sym) + + while 1: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$' + if debug: + print "%-20s : %s" % (lookahead, [xx.type for xx in symstack]) + + # Check the action table + s = statestack[-1] + ltype = lookahead.type + t = actions.get((s,ltype),None) + + if t is not None: + if t > 0: + # shift a symbol on the stack + if ltype == '$': + # Error, end of input + print "yacc: Parse error. EOF" + return + statestack.append(t) + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if self.errorcount > 0: + self.errorcount -= 1 + + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + try: + sym.lineno = targ[1].lineno + sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) + except AttributeError: + sym.lineno = 0 + del symstack[-plen:] + del statestack[-plen:] + else: + sym.lineno = 0 + targ = [ sym ] + pslice.slice = targ + pslice.pbstack = [] + # Call the grammar rule with our special slice object + p.func(pslice) + + # Validate attributes of the resulting value attribute +# if require: +# try: +# t0 = targ[0] +# r = Requires.get(t0.type,None) +# t0d = t0.__dict__ +# if r: +# for field in r: +# tn = t0 +# for fname in field: +# try: +# tf = tn.__dict__ +# tn = tf.get(fname) +# except StandardError: +# tn = None +# if not tn: +# print "%s:%d: Rule %s doesn't set required attribute '%s'" % \ +# (p.file,p.line,p.name,".".join(field)) +# except TypeError,LookupError: +# print "Bad requires directive " % r +# pass + + + # If there was a pushback, put that on the stack + if pslice.pbstack: + lookaheadstack.append(lookahead) + for _t in pslice.pbstack: + lookaheadstack.append(_t) + lookahead = None + + symstack.append(sym) + statestack.append(goto[statestack[-1],pname]) + continue + + if t == 0: + n = symstack[-1] + return getattr(n,"value",None) + + if t == None: + # We have some kind of parsing error here. To handle this, + # we are going to push the current token onto the tokenstack + # and replace it with an 'error' token. If there are any synchronization + # rules, they may catch it. + # + # In addition to pushing the error token, we call call the user defined p_error() + # function if this is the first syntax error. This function is only called + # if errorcount == 0. + + if not self.errorcount: + self.errorcount = error_count + errtoken = lookahead + if errtoken.type == '$': + errtoken = None # End of file! + if self.errorfunc: + global errok,token,restart + errok = self.errok # Set some special functions available in error recovery + token = get_token + restart = self.restart + tok = self.errorfunc(errtoken) + del errok, token, restart # Delete special functions + + if not self.errorcount: + # User must have done some kind of panic mode recovery on their own. The returned token + # is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken,"lineno"): lineno = lookahead.lineno + else: lineno = 0 + if lineno: + print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type) + else: + print "yacc: Syntax error, token=%s" % errtoken.type + else: + print "yacc: Parse error in input. EOF" + return + + else: + self.errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$': + lookahead = None + errtoken = None + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + t = YaccSymbol() + t.type = 'error' + if hasattr(lookahead,"lineno"): + t.lineno = lookahead.lineno + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + symstack.pop() + statestack.pop() + + continue + + # Call an error function here + raise RuntimeError, "yacc: internal parser error!!!\n" + +# ----------------------------------------------------------------------------- +# === Parser Construction === +# +# The following functions and variables are used to implement the yacc() function +# itself. This is pretty hairy stuff involving lots of error checking, +# construction of LR items, kernels, and so forth. Although a lot of +# this work is done using global variables, the resulting Parser object +# is completely self contained--meaning that it is safe to repeatedly +# call yacc() with different grammars in the same application. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# validate_file() +# +# This function checks to see if there are duplicated p_rulename() functions +# in the parser module file. Without this function, it is really easy for +# users to make mistakes by cutting and pasting code fragments (and it's a real +# bugger to try and figure out why the resulting parser doesn't work). Therefore, +# we just do a little regular expression pattern matching of def statements +# to try and detect duplicates. +# ----------------------------------------------------------------------------- + +def validate_file(filename): + base,ext = os.path.splitext(filename) + if ext != '.py': return 1 # No idea. Assume it's okay. + + try: + f = open(filename) + lines = f.readlines() + f.close() + except IOError: + return 1 # Oh well + + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + counthash = { } + linen = 1 + noerror = 1 + for l in lines: + m = fre.match(l) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev) + noerror = 0 + linen += 1 + return noerror + +# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix. +def validate_dict(d): + for n,v in d.items(): + if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue + if n[0:2] == 't_': continue + + if n[0:2] == 'p_': + print "yacc: Warning. '%s' not defined as a function" % n + if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: + try: + doc = v.__doc__.split(" ") + if doc[1] == ':': + print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n) + except StandardError: + pass + +# ----------------------------------------------------------------------------- +# === GRAMMAR FUNCTIONS === +# +# The following global variables and functions are used to store, manipulate, +# and verify the grammar rules specified by the user. +# ----------------------------------------------------------------------------- + +# Initialize all of the global variables used during grammar construction +def initialize_vars(): + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + Prodmap = { } # A dictionary that is only used to detect duplicate + # productions. + + Terminals = { } # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + Nonterminals = { } # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + First = { } # A dictionary of precomputed FIRST(x) symbols + + Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + + Precedence = { } # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + LRitems = [ ] # A list of all LR items for the grammar. These are the + # productions with the "dot" like E -> E . PLUS E + + Errorfunc = None # User defined error handler + + Signature = md5.new() # Digital signature of the grammar rules, precedence + # and other information. Used to determined when a + # parsing table needs to be regenerated. + + Requires = { } # Requires list + + # File objects used when creating the parser.out debugging file + global _vf, _vfc + _vf = cStringIO.StringIO() + _vfc = cStringIO.StringIO() + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# It has a few required attributes: +# +# name - Name of the production (nonterminal) +# prod - A list of symbols making up its production +# number - Production number. +# +# In addition, a few additional attributes are used to help with debugging or +# optimization of table generation. +# +# file - File where production action is defined. +# lineno - Line number where action is defined +# func - Action function +# prec - Precedence level +# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' +# then lr_next refers to 'E -> E PLUS . E' +# lr_index - LR item index (location of the ".") in the prod list. +# len - Length of the production (number of symbols on right hand side) +# ----------------------------------------------------------------------------- + +class Production: + def __init__(self,**kw): + for k,v in kw.items(): + setattr(self,k,v) + self.lr_index = -1 + self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure + self.usyms = [ ] + + def __str__(self): + if self.prod: + s = "%s -> %s" % (self.name," ".join(self.prod)) + else: + s = "%s -> <empty>" % self.name + return s + + def __repr__(self): + return str(self) + + # Compute lr_items from the production + def lr_item(self,n): + if n > len(self.prod): return None + p = Production() + p.name = self.name + p.prod = list(self.prod) + p.number = self.number + p.lr_index = n + p.prod.insert(n,".") + p.prod = tuple(p.prod) + p.len = len(p.prod) + p.usyms = self.usyms + + # Precompute list of productions immediately following + try: + p.lrafter = Prodnames[p.prod[n+1]] + except (IndexError,KeyError),e: + p.lrafter = [] + try: + p.lrbefore = p.prod[n-1] + except IndexError: + p.lrbefore = None + + return p + +class MiniProduction: + pass + +# Utility function +def is_identifier(s): + for c in s: + if not (c.isalnum() or c == '_'): return 0 + return 1 + +# ----------------------------------------------------------------------------- +# add_production() +# +# Given an action function, this function assembles a production rule. +# The production rule is assumed to be found in the function's docstring. +# This rule has the general syntax: +# +# name1 ::= production1 +# | production2 +# | production3 +# ... +# | productionn +# name2 ::= production1 +# | production2 +# ... +# ----------------------------------------------------------------------------- + +def add_production(f,file,line,prodname,syms): + + if Terminals.has_key(prodname): + print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname) + return -1 + if prodname == 'error': + print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname) + return -1 + + if not is_identifier(prodname): + print "%s:%d: Illegal rule name '%s'" % (file,line,prodname) + return -1 + + for s in syms: + if not is_identifier(s) and s != '%prec': + print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname) + return -1 + + # See if the rule is already in the rulemap + map = "%s -> %s" % (prodname,syms) + if Prodmap.has_key(map): + m = Prodmap[map] + print "%s:%d: Duplicate rule %s." % (file,line, m) + print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line) + return -1 + + p = Production() + p.name = prodname + p.prod = syms + p.file = file + p.line = line + p.func = f + p.number = len(Productions) + + + Productions.append(p) + Prodmap[map] = p + if not Nonterminals.has_key(prodname): + Nonterminals[prodname] = [ ] + + # Add all terminals to Terminals + i = 0 + while i < len(p.prod): + t = p.prod[i] + if t == '%prec': + try: + precname = p.prod[i+1] + except IndexError: + print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line) + return -1 + + prec = Precedence.get(precname,None) + if not prec: + print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname) + return -1 + else: + p.prec = prec + del p.prod[i] + del p.prod[i] + continue + + if Terminals.has_key(t): + Terminals[t].append(p.number) + # Is a terminal. We'll assign a precedence to p based on this + if not hasattr(p,"prec"): + p.prec = Precedence.get(t,('right',0)) + else: + if not Nonterminals.has_key(t): + Nonterminals[t] = [ ] + Nonterminals[t].append(p.number) + i += 1 + + if not hasattr(p,"prec"): + p.prec = ('right',0) + + # Set final length of productions + p.len = len(p.prod) + p.prod = tuple(p.prod) + + # Calculate unique syms in the production + p.usyms = [ ] + for s in p.prod: + if s not in p.usyms: + p.usyms.append(s) + + # Add to the global productions list + try: + Prodnames[p.name].append(p) + except KeyError: + Prodnames[p.name] = [ p ] + return 0 + +# Given a raw rule function, this function rips out its doc string +# and adds rules to the grammar + +def add_function(f): + line = f.func_code.co_firstlineno + file = f.func_code.co_filename + error = 0 + + if f.func_code.co_argcount > 1: + print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) + return -1 + + if f.func_code.co_argcount < 1: + print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) + return -1 + + if f.__doc__: + # Split the doc string into lines + pstrings = f.__doc__.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + print "%s:%d: Misplaced '|'." % (file,dline) + return -1 + prodname = lastp + if len(p) > 1: + syms = p[1:] + else: + syms = [ ] + else: + prodname = p[0] + lastp = prodname + assign = p[1] + if len(p) > 2: + syms = p[2:] + else: + syms = [ ] + if assign != ':' and assign != '::=': + print "%s:%d: Syntax error. Expected ':'" % (file,dline) + return -1 + e = add_production(f,file,dline,prodname,syms) + error += e + except StandardError: + print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps) + error -= 1 + else: + print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__) + return error + + +# Cycle checking code (Michael Dyck) + +def compute_reachable(): + ''' + Find each symbol that can be reached from the start symbol. + Print a warning for any nonterminals that can't be reached. + (Unused terminals have already had their warning.) + ''' + Reachable = { } + for s in Terminals.keys() + Nonterminals.keys(): + Reachable[s] = 0 + + mark_reachable_from( Productions[0].prod[0], Reachable ) + + for s in Nonterminals.keys(): + if not Reachable[s]: + print "yacc: Symbol '%s' is unreachable." % s + +def mark_reachable_from(s, Reachable): + ''' + Mark all symbols that are reachable from symbol s. + ''' + if Reachable[s]: + # We've already reached symbol s. + return + Reachable[s] = 1 + for p in Prodnames.get(s,[]): + for r in p.prod: + mark_reachable_from(r, Reachable) + +# ----------------------------------------------------------------------------- +# compute_terminates() +# +# This function looks at the various parsing rules and tries to detect +# infinite recursion cycles (grammar rules where there is no possible way +# to derive a string of only terminals). +# ----------------------------------------------------------------------------- +def compute_terminates(): + ''' + Raise an error for any symbols that don't terminate. + ''' + Terminates = {} + + # Terminals: + for t in Terminals.keys(): + Terminates[t] = 1 + + Terminates['$'] = 1 + + # Nonterminals: + + # Initialize to false: + for n in Nonterminals.keys(): + Terminates[n] = 0 + + # Then propagate termination until no change: + while 1: + some_change = 0 + for (n,pl) in Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not Terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = 0 + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = 1 + + if p_terminates: + # symbol n terminates! + if not Terminates[n]: + Terminates[n] = 1 + some_change = 1 + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + some_error = 0 + for (s,terminates) in Terminates.items(): + if not terminates: + if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + print "yacc: Infinite recursion detected for symbol '%s'." % s + some_error = 1 + + return some_error + +# ----------------------------------------------------------------------------- +# verify_productions() +# +# This function examines all of the supplied rules to see if they seem valid. +# ----------------------------------------------------------------------------- +def verify_productions(cycle_check=1): + error = 0 + for p in Productions: + if not p: continue + + for s in p.prod: + if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': + print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s) + error = 1 + continue + + unused_tok = 0 + # Now verify all of the tokens + if yaccdebug: + _vf.write("Unused terminals:\n\n") + for s,v in Terminals.items(): + if s != 'error' and not v: + print "yacc: Warning. Token '%s' defined, but not used." % s + if yaccdebug: _vf.write(" %s\n"% s) + unused_tok += 1 + + # Print out all of the productions + if yaccdebug: + _vf.write("\nGrammar\n\n") + for i in range(1,len(Productions)): + _vf.write("Rule %-5d %s\n" % (i, Productions[i])) + + unused_prod = 0 + # Verify the use of all productions + for s,v in Nonterminals.items(): + if not v: + p = Prodnames[s][0] + print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s) + unused_prod += 1 + + + if unused_tok == 1: + print "yacc: Warning. There is 1 unused token." + if unused_tok > 1: + print "yacc: Warning. There are %d unused tokens." % unused_tok + + if unused_prod == 1: + print "yacc: Warning. There is 1 unused rule." + if unused_prod > 1: + print "yacc: Warning. There are %d unused rules." % unused_prod + + if yaccdebug: + _vf.write("\nTerminals, with rules where they appear\n\n") + ks = Terminals.keys() + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) + _vf.write("\nNonterminals, with rules where they appear\n\n") + ks = Nonterminals.keys() + ks.sort() + for k in ks: + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) + + if (cycle_check): + compute_reachable() + error += compute_terminates() +# error += check_cycles() + return error + +# ----------------------------------------------------------------------------- +# build_lritems() +# +# This function walks the list of productions and builds a complete set of the +# LR items. The LR items are stored in two ways: First, they are uniquely +# numbered and placed in the list _lritems. Second, a linked list of LR items +# is built for each production. For example: +# +# E -> E PLUS E +# +# Creates the list +# +# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] +# ----------------------------------------------------------------------------- + +def build_lritems(): + for p in Productions: + lastlri = p + lri = p.lr_item(0) + i = 0 + while 1: + lri = p.lr_item(i) + lastlri.lr_next = lri + if not lri: break + lri.lr_num = len(LRitems) + LRitems.append(lri) + lastlri = lri + i += 1 + + # In order for the rest of the parser generator to work, we need to + # guarantee that no more lritems are generated. Therefore, we nuke + # the p.lr_item method. (Only used in debugging) + # Production.lr_item = None + +# ----------------------------------------------------------------------------- +# add_precedence() +# +# Given a list of precedence rules, add to the precedence table. +# ----------------------------------------------------------------------------- + +def add_precedence(plist): + plevel = 0 + error = 0 + for p in plist: + plevel += 1 + try: + prec = p[0] + terms = p[1:] + if prec != 'left' and prec != 'right' and prec != 'nonassoc': + print "yacc: Invalid precedence '%s'" % prec + return -1 + for t in terms: + if Precedence.has_key(t): + print "yacc: Precedence already specified for terminal '%s'" % t + error += 1 + continue + Precedence[t] = (prec,plevel) + except: + print "yacc: Invalid precedence table." + error += 1 + + return error + +# ----------------------------------------------------------------------------- +# augment_grammar() +# +# Compute the augmented grammar. This is just a rule S' -> start where start +# is the starting symbol. +# ----------------------------------------------------------------------------- + +def augment_grammar(start=None): + if not start: + start = Productions[1].name + Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) + Productions[0].usyms = [ start ] + Nonterminals[start].append(0) + + +# ------------------------------------------------------------------------- +# first() +# +# Compute the value of FIRST1(beta) where beta is a tuple of symbols. +# +# During execution of compute_first1, the result may be incomplete. +# Afterward (e.g., when called from compute_follow()), it will be complete. +# ------------------------------------------------------------------------- +def first(beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [ ] + for x in beta: + x_produces_empty = 0 + + # Add all the non-<empty> symbols of First[x] to the result. + for f in First[x]: + if f == '<empty>': + x_produces_empty = 1 + else: + if f not in result: result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('<empty>') + + return result + + +# FOLLOW(x) +# Given a non-terminal. This function computes the set of all symbols +# that might follow it. Dragon book, p. 189. + +def compute_follow(start=None): + # Add '$' to the follow list of the start symbol + for k in Nonterminals.keys(): + Follow[k] = [ ] + + if not start: + start = Productions[1].name + + Follow[start] = [ '$' ] + + while 1: + didadd = 0 + for p in Productions[1:]: + # Here is the production set + for i in range(len(p.prod)): + B = p.prod[i] + if Nonterminals.has_key(B): + # Okay. We got a non-terminal in a production + fst = first(p.prod[i+1:]) + hasempty = 0 + for f in fst: + if f != '<empty>' and f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if f == '<empty>': + hasempty = 1 + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in Follow[p.name]: + if f not in Follow[B]: + Follow[B].append(f) + didadd = 1 + if not didadd: break + + if 0 and yaccdebug: + _vf.write('\nFollow:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]]))) + +# ------------------------------------------------------------------------- +# compute_first1() +# +# Compute the value of FIRST1(X) for all symbols +# ------------------------------------------------------------------------- +def compute_first1(): + + # Terminals: + for t in Terminals.keys(): + First[t] = [t] + + First['$'] = ['$'] + First['#'] = ['#'] # what's this for? + + # Nonterminals: + + # Initialize to the empty set: + for n in Nonterminals.keys(): + First[n] = [] + + # Then propagate symbols until no change: + while 1: + some_change = 0 + for n in Nonterminals.keys(): + for p in Prodnames[n]: + for f in first(p.prod): + if f not in First[n]: + First[n].append( f ) + some_change = 1 + if not some_change: + break + + if 0 and yaccdebug: + _vf.write('\nFirst:\n') + for k in Nonterminals.keys(): + _vf.write("%-20s : %s\n" % + (k, " ".join([str(s) for s in First[k]]))) + +# ----------------------------------------------------------------------------- +# === SLR Generation === +# +# The following functions are used to construct SLR (Simple LR) parsing tables +# as described on p.221-229 of the dragon book. +# ----------------------------------------------------------------------------- + +# Global variables for the LR parsing engine +def lr_init_vars(): + global _lr_action, _lr_goto, _lr_method + global _lr_goto_cache + + _lr_action = { } # Action table + _lr_goto = { } # Goto table + _lr_method = "Unknown" # LR method used + _lr_goto_cache = { } + +# Compute the LR(0) closure operation on I, where I is a set of LR(0) items. +# prodlist is a list of productions. + +_add_count = 0 # Counter used to detect cycles + +def lr0_closure(I): + global _add_count + + _add_count += 1 + prodlist = Productions + + # Add everything in I to J + J = I[:] + didadd = 1 + while didadd: + didadd = 0 + for j in J: + for x in j.lrafter: + if x.lr0_added == _add_count: continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = _add_count + didadd = 1 + + return J + +# Compute the LR(0) goto function goto(I,X) where I is a set +# of LR(0) items and X is a grammar symbol. This function is written +# in a way that guarantees uniqueness of the generated goto sets +# (i.e. the same goto set will never be returned as two different Python +# objects). With uniqueness, we can later do fast set comparisons using +# id(obj) instead of element-wise comparison. + +def lr0_goto(I,x): + # First we look for a previously cached entry + g = _lr_goto_cache.get((id(I),x),None) + if g: return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = _lr_goto_cache.get(x,None) + if not s: + s = { } + _lr_goto_cache[x] = s + + gs = [ ] + for p in I: + n = p.lr_next + if n and n.lrbefore == x: + s1 = s.get(id(n),None) + if not s1: + s1 = { } + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$',None) + if not g: + if gs: + g = lr0_closure(gs) + s['$'] = g + else: + s['$'] = gs + _lr_goto_cache[(id(I),x)] = g + return g + +# Compute the kernel of a set of LR(0) items +def lr0_kernel(I): + KI = [ ] + for p in I: + if p.name == "S'" or p.lr_index > 0 or p.len == 0: + KI.append(p) + + return KI + +_lr0_cidhash = { } + +# Compute the LR(0) sets of item function +def lr0_items(): + + C = [ lr0_closure([Productions[0].lr_next]) ] + i = 0 + for I in C: + _lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = { } + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms.keys(): + g = lr0_goto(I,x) + if not g: continue + if _lr0_cidhash.has_key(id(g)): continue + _lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + +# ----------------------------------------------------------------------------- +# slr_parse_table() +# +# This function constructs an SLR table. +# ----------------------------------------------------------------------------- +def slr_parse_table(): + global _lr_method + goto = _lr_goto # Goto array + action = _lr_action # Action array + actionp = { } # Action production array (temporary) + + _lr_method = "SLR" + + n_srconflict = 0 + n_rrconflict = 0 + + if yaccdebug: + _vf.write("\n\nParsing method: SLR\n\n") + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = lr0_items() + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [ ] # List of actions + + if yaccdebug: + _vf.write("\nstate %d\n\n" % st) + for p in I: + _vf.write(" (%d) %s\n" % (p.number, str(p))) + _vf.write("\n") + + for p in I: + try: + if p.prod[-1] == ".": + if p.name == "S'": + # Start symbol. Accept! + action[st,"$"] = 0 + actionp[st,"$"] = p + else: + # We are at the end of a production. Reduce! + for a in Follow[p.name]: + actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) + r = action.get((st,a),None) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + sprec,slevel = Productions[actionp[st,a].number].prec + rprec,rlevel = Precedence.get(a,('right',0)) + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + action[st,a] = -p.number + actionp[st,a] = p + if not slevel and not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + n_srconflict += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the shift + if not slevel and not rlevel: + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + n_srconflict +=1 + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + action[st,a] = -p.number + actionp[st,a] = p + # print "Reduce/reduce conflict in state %d" % st + n_rrconflict += 1 + _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) + _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) + else: + print "Unknown conflict in state %d" % st + else: + action[st,a] = -p.number + actionp[st,a] = p + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if Terminals.has_key(a): + g = lr0_goto(I,a) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + # We are in a shift state + actlist.append((a,p,"shift and go to state %d" % j)) + r = action.get((st,a),None) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + print "Shift/shift conflict in state %d" % st + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + rprec,rlevel = Productions[actionp[st,a].number].prec + sprec,slevel = Precedence.get(a,('right',0)) + if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): + # We decide to shift here... highest precedence to shift + action[st,a] = j + actionp[st,a] = p + if not slevel and not rlevel: + n_srconflict += 1 + _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + action[st,a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + n_srconflict +=1 + _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) + _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) + + else: + print "Unknown conflict in state %d" % st + else: + action[st,a] = j + actionp[st,a] = p + + except StandardError,e: + raise YaccError, "Hosed in slr_parse_table", e + + # Print the actions associated with each terminal + if yaccdebug: + for a,p,m in actlist: + if action.has_key((st,a)): + if p is actionp[st,a]: + _vf.write(" %-15s %s\n" % (a,m)) + _vf.write("\n") + for a,p,m in actlist: + if action.has_key((st,a)): + if p is not actionp[st,a]: + _vf.write(" ! %-15s [ %s ]\n" % (a,m)) + + # Construct the goto table for this state + if yaccdebug: + _vf.write("\n") + nkeys = { } + for ii in I: + for s in ii.usyms: + if Nonterminals.has_key(s): + nkeys[s] = None + for n in nkeys.keys(): + g = lr0_goto(I,n) + j = _lr0_cidhash.get(id(g),-1) + if j >= 0: + goto[st,n] = j + if yaccdebug: + _vf.write(" %-15s shift and go to state %d\n" % (n,j)) + + st += 1 + + if n_srconflict == 1: + print "yacc: %d shift/reduce conflict" % n_srconflict + if n_srconflict > 1: + print "yacc: %d shift/reduce conflicts" % n_srconflict + if n_rrconflict == 1: + print "yacc: %d reduce/reduce conflict" % n_rrconflict + if n_rrconflict > 1: + print "yacc: %d reduce/reduce conflicts" % n_rrconflict + + +# ----------------------------------------------------------------------------- +# ==== LALR(1) Parsing ==== +# **** UNFINISHED! 6/16/01 +# ----------------------------------------------------------------------------- + + +# Compute the lr1_closure of a set I. I is a list of tuples (p,a) where +# p is a LR0 item and a is a terminal + +_lr1_add_count = 0 + +def lr1_closure(I): + global _lr1_add_count + + _lr1_add_count += 1 + + J = I[:] + + # Loop over items (p,a) in I. + ji = 0 + while ji < len(J): + p,a = J[ji] + # p = [ A -> alpha . B beta] + + # For each production B -> gamma + for B in p.lr1_after: + f = tuple(p.lr1_beta + (a,)) + + # For each terminal b in first(Beta a) + for b in first(f): + # Check if (B -> . gamma, b) is in J + # Only way this can happen is if the add count mismatches + pn = B.lr_next + if pn.lr_added.get(b,0) == _lr1_add_count: continue + pn.lr_added[b] = _lr1_add_count + J.append((pn,b)) + ji += 1 + + return J + +def lalr_parse_table(): + + # Compute some lr1 information about all of the productions + for p in LRitems: + try: + after = p.prod[p.lr_index + 1] + p.lr1_after = Prodnames[after] + p.lr1_beta = p.prod[p.lr_index + 2:] + except LookupError: + p.lr1_after = [ ] + p.lr1_beta = [ ] + p.lr_added = { } + + # Compute the LR(0) items + C = lr0_items() + CK = [] + for I in C: + CK.append(lr0_kernel(I)) + + print CK + +# ----------------------------------------------------------------------------- +# ==== LR Utility functions ==== +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# _lr_write_tables() +# +# This function writes the LR parsing tables to a file +# ----------------------------------------------------------------------------- + +def lr_write_tables(modulename=tab_module): + filename = modulename + ".py" + try: + f = open(filename,"w") + + f.write(""" +# %s +# This file is automatically generated. Do not edit. + +_lr_method = %s + +_lr_signature = %s +""" % (filename, repr(_lr_method), repr(Signature.digest()))) + + # Change smaller to 0 to go back to original tables + smaller = 1 + + # Factor out names to try and make smaller + if smaller: + items = { } + + for k,v in _lr_action.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_action_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_action = { } +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_action[(_x,_k)] = _y +del _lr_action_items +""") + + else: + f.write("\n_lr_action = { "); + for k,v in _lr_action.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + if smaller: + # Factor out names to try and make smaller + items = { } + + for k,v in _lr_goto.items(): + i = items.get(k[1]) + if not i: + i = ([],[]) + items[k[1]] = i + i[0].append(k[0]) + i[1].append(v) + + f.write("\n_lr_goto_items = {") + for k,v in items.items(): + f.write("%r:([" % k) + for i in v[0]: + f.write("%r," % i) + f.write("],[") + for i in v[1]: + f.write("%r," % i) + + f.write("]),") + f.write("}\n") + + f.write(""" +_lr_goto = { } +for _k, _v in _lr_goto_items.items(): + for _x,_y in zip(_v[0],_v[1]): + _lr_goto[(_x,_k)] = _y +del _lr_goto_items +""") + else: + f.write("\n_lr_goto = { "); + for k,v in _lr_goto.items(): + f.write("(%r,%r):%r," % (k[0],k[1],v)) + f.write("}\n"); + + # Write production table + f.write("_lr_productions = [\n") + for p in Productions: + if p: + if (p.func): + f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) + else: + f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) + else: + f.write(" None,\n") + f.write("]\n") + f.close() + + except IOError,e: + print "Unable to create '%s'" % filename + print e + return + +def lr_read_tables(module=tab_module,optimize=0): + global _lr_action, _lr_goto, _lr_productions, _lr_method + try: + exec "import %s as parsetab" % module + + if (optimize) or (Signature.digest() == parsetab._lr_signature): + _lr_action = parsetab._lr_action + _lr_goto = parsetab._lr_goto + _lr_productions = parsetab._lr_productions + _lr_method = parsetab._lr_method + return 1 + else: + return 0 + + except (ImportError,AttributeError): + return 0 + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build the parser module +# ----------------------------------------------------------------------------- + +def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0): + global yaccdebug + yaccdebug = debug + + initialize_vars() + files = { } + error = 0 + + # Add starting symbol to signature + if start: + Signature.update(start) + + # Try to figure out what module we are working with + if module: + # User supplied a module object. + if not isinstance(module, types.ModuleType): + raise ValueError,"Expected a module" + + ldict = module.__dict__ + + else: + # No module given. We might be able to get information from the caller. + # Throw an exception and unwind the traceback to get the globals + + try: + raise RuntimeError + except RuntimeError: + e,b,t = sys.exc_info() + f = t.tb_frame + f = f.f_back # Walk out to our calling function + ldict = f.f_globals # Grab its globals dictionary + + # If running in optimized mode. We're going to + + if (optimize and lr_read_tables(tabmodule,1)): + # Read parse table + del Productions[:] + for p in _lr_productions: + if not p: + Productions.append(None) + else: + m = MiniProduction() + m.name = p[0] + m.len = p[1] + m.file = p[3] + m.line = p[4] + if p[2]: + m.func = ldict[p[2]] + Productions.append(m) + + else: + # Get the tokens map + tokens = ldict.get("tokens",None) + + if not tokens: + raise YaccError,"module does not define a list 'tokens'" + if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): + raise YaccError,"tokens must be a list or tuple." + + # Check to see if a requires dictionary is defined. + requires = ldict.get("require",None) + if requires: + if not (isinstance(requires,types.DictType)): + raise YaccError,"require must be a dictionary." + + for r,v in requires.items(): + try: + if not (isinstance(v,types.ListType)): + raise TypeError + v1 = [x.split(".") for x in v] + Requires[r] = v1 + except StandardError: + print "Invalid specification for rule '%s' in require. Expected a list of strings" % r + + + # Build the dictionary of terminals. We a record a 0 in the + # dictionary to track whether or not a terminal is actually + # used in the grammar + + if 'error' in tokens: + print "yacc: Illegal token 'error'. Is a reserved word." + raise YaccError,"Illegal token name" + + for n in tokens: + if Terminals.has_key(n): + print "yacc: Warning. Token '%s' multiply defined." % n + Terminals[n] = [ ] + + Terminals['error'] = [ ] + + # Get the precedence map (if any) + prec = ldict.get("precedence",None) + if prec: + if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): + raise YaccError,"precedence must be a list or tuple." + add_precedence(prec) + Signature.update(repr(prec)) + + for n in tokens: + if not Precedence.has_key(n): + Precedence[n] = ('right',0) # Default, right associative, 0 precedence + + # Look for error handler + ef = ldict.get('p_error',None) + if ef: + if not isinstance(ef,types.FunctionType): + raise YaccError,"'p_error' defined, but is not a function." + eline = ef.func_code.co_firstlineno + efile = ef.func_code.co_filename + files[efile] = None + + if (ef.func_code.co_argcount != 1): + raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) + global Errorfunc + Errorfunc = ef + else: + print "yacc: Warning. no p_error() function is defined." + + # Get the list of built-in functions with p_ prefix + symbols = [ldict[f] for f in ldict.keys() + if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_' + and ldict[f].__name__ != 'p_error')] + + # Check for non-empty symbols + if len(symbols) == 0: + raise YaccError,"no rules of the form p_rulename are defined." + + # Sort the symbols by line number + symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) + + # Add all of the symbols to the grammar + for f in symbols: + if (add_function(f)) < 0: + error += 1 + else: + files[f.func_code.co_filename] = None + + # Make a signature of the docstrings + for f in symbols: + if f.__doc__: + Signature.update(f.__doc__) + + lr_init_vars() + + if error: + raise YaccError,"Unable to construct parser." + + if not lr_read_tables(tabmodule): + + # Validate files + for filename in files.keys(): + if not validate_file(filename): + error = 1 + + # Validate dictionary + validate_dict(ldict) + + if start and not Prodnames.has_key(start): + raise YaccError,"Bad starting symbol '%s'" % start + + augment_grammar(start) + error = verify_productions(cycle_check=check_recursion) + otherfunc = [ldict[f] for f in ldict.keys() + if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')] + + if error: + raise YaccError,"Unable to construct parser." + + build_lritems() + compute_first1() + compute_follow(start) + + if method == 'SLR': + slr_parse_table() + elif method == 'LALR1': + lalr_parse_table() + return + else: + raise YaccError, "Unknown parsing method '%s'" % method + + lr_write_tables(tabmodule) + + if yaccdebug: + try: + f = open(debug_file,"w") + f.write(_vfc.getvalue()) + f.write("\n\n") + f.write(_vf.getvalue()) + f.close() + except IOError,e: + print "yacc: can't create '%s'" % debug_file,e + + # Made it here. Create a parser object and set up its internal state. + # Set global parse() method to bound method of parser object. + + p = Parser("xyzzy") + p.productions = Productions + p.errorfunc = Errorfunc + p.action = _lr_action + p.goto = _lr_goto + p.method = _lr_method + p.require = Requires + + global parse + parse = p.parse + + # Clean up all of the globals we created + if (not optimize): + yacc_cleanup() + return p + +# yacc_cleanup function. Delete all of the global variables +# used during table construction + +def yacc_cleanup(): + global _lr_action, _lr_goto, _lr_method, _lr_goto_cache + del _lr_action, _lr_goto, _lr_method, _lr_goto_cache + + global Productions, Prodnames, Prodmap, Terminals + global Nonterminals, First, Follow, Precedence, LRitems + global Errorfunc, Signature, Requires + + del Productions, Prodnames, Prodmap, Terminals + del Nonterminals, First, Follow, Precedence, LRitems + del Errorfunc, Signature, Requires + + global _vf, _vfc + del _vf, _vfc + + +# Stub that raises an error if parsing is attempted without first calling yacc() +def parse(*args,**kwargs): + raise YaccError, "yacc: No parser built with yacc()" + |